|
{ |
|
"best_metric": 1.322691559791565, |
|
"best_model_checkpoint": "distilbert_sa_pre-training-complete/checkpoint-293068", |
|
"epoch": 167.87912702853944, |
|
"global_step": 300000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9719239746582195e-05, |
|
"loss": 1.8819, |
|
"step": 1787 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6569985974754559, |
|
"eval_loss": 1.634997844696045, |
|
"eval_runtime": 1.7494, |
|
"eval_samples_per_second": 273.804, |
|
"eval_steps_per_second": 2.286, |
|
"step": 1787 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.942130710236746e-05, |
|
"loss": 1.7699, |
|
"step": 3574 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.666442067435919, |
|
"eval_loss": 1.5847446918487549, |
|
"eval_runtime": 1.6443, |
|
"eval_samples_per_second": 291.312, |
|
"eval_steps_per_second": 2.433, |
|
"step": 3574 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.912337445815272e-05, |
|
"loss": 1.7308, |
|
"step": 5361 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6677533506088217, |
|
"eval_loss": 1.5856640338897705, |
|
"eval_runtime": 1.5899, |
|
"eval_samples_per_second": 301.279, |
|
"eval_steps_per_second": 2.516, |
|
"step": 5361 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.882544181393798e-05, |
|
"loss": 1.7062, |
|
"step": 7148 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6683528823169714, |
|
"eval_loss": 1.5678603649139404, |
|
"eval_runtime": 1.7076, |
|
"eval_samples_per_second": 280.506, |
|
"eval_steps_per_second": 2.342, |
|
"step": 7148 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.852750916972325e-05, |
|
"loss": 1.6858, |
|
"step": 8935 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6689400715563506, |
|
"eval_loss": 1.557257056236267, |
|
"eval_runtime": 1.6041, |
|
"eval_samples_per_second": 298.603, |
|
"eval_steps_per_second": 2.494, |
|
"step": 8935 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.822974324774925e-05, |
|
"loss": 1.6684, |
|
"step": 10722 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6686847832152224, |
|
"eval_loss": 1.5590558052062988, |
|
"eval_runtime": 1.6561, |
|
"eval_samples_per_second": 289.241, |
|
"eval_steps_per_second": 2.415, |
|
"step": 10722 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.793181060353452e-05, |
|
"loss": 1.6545, |
|
"step": 12509 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6756023841134431, |
|
"eval_loss": 1.525283694267273, |
|
"eval_runtime": 1.6268, |
|
"eval_samples_per_second": 294.451, |
|
"eval_steps_per_second": 2.459, |
|
"step": 12509 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.763387795931978e-05, |
|
"loss": 1.6438, |
|
"step": 14296 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6748053099797688, |
|
"eval_loss": 1.5303529500961304, |
|
"eval_runtime": 1.6169, |
|
"eval_samples_per_second": 296.24, |
|
"eval_steps_per_second": 2.474, |
|
"step": 14296 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.7336112037345786e-05, |
|
"loss": 1.631, |
|
"step": 16083 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6805022280765674, |
|
"eval_loss": 1.4976379871368408, |
|
"eval_runtime": 1.865, |
|
"eval_samples_per_second": 256.834, |
|
"eval_steps_per_second": 2.145, |
|
"step": 16083 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.7038179393131046e-05, |
|
"loss": 1.6236, |
|
"step": 17870 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.677310686482661, |
|
"eval_loss": 1.5153287649154663, |
|
"eval_runtime": 1.6084, |
|
"eval_samples_per_second": 297.811, |
|
"eval_steps_per_second": 2.487, |
|
"step": 17870 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.6740413471157055e-05, |
|
"loss": 1.613, |
|
"step": 19657 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6785895056521005, |
|
"eval_loss": 1.5010405778884888, |
|
"eval_runtime": 1.613, |
|
"eval_samples_per_second": 296.958, |
|
"eval_steps_per_second": 2.48, |
|
"step": 19657 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.6442480826942315e-05, |
|
"loss": 1.6046, |
|
"step": 21444 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6817956622261367, |
|
"eval_loss": 1.5068795680999756, |
|
"eval_runtime": 1.617, |
|
"eval_samples_per_second": 296.234, |
|
"eval_steps_per_second": 2.474, |
|
"step": 21444 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 4.6144714904968324e-05, |
|
"loss": 1.5963, |
|
"step": 23231 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6804745455575061, |
|
"eval_loss": 1.4795995950698853, |
|
"eval_runtime": 1.5836, |
|
"eval_samples_per_second": 302.481, |
|
"eval_steps_per_second": 2.526, |
|
"step": 23231 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 4.5846948982994333e-05, |
|
"loss": 1.5906, |
|
"step": 25018 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6809435655783687, |
|
"eval_loss": 1.485795497894287, |
|
"eval_runtime": 1.6062, |
|
"eval_samples_per_second": 298.221, |
|
"eval_steps_per_second": 2.49, |
|
"step": 25018 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 4.554918306102034e-05, |
|
"loss": 1.5833, |
|
"step": 26805 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6805767364480798, |
|
"eval_loss": 1.496237874031067, |
|
"eval_runtime": 1.5896, |
|
"eval_samples_per_second": 301.324, |
|
"eval_steps_per_second": 2.516, |
|
"step": 26805 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 4.52512504168056e-05, |
|
"loss": 1.5771, |
|
"step": 28592 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6826146494145264, |
|
"eval_loss": 1.4748649597167969, |
|
"eval_runtime": 1.5864, |
|
"eval_samples_per_second": 301.949, |
|
"eval_steps_per_second": 2.521, |
|
"step": 28592 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 4.495365121707236e-05, |
|
"loss": 1.5703, |
|
"step": 30379 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6873812451100928, |
|
"eval_loss": 1.4724414348602295, |
|
"eval_runtime": 1.6157, |
|
"eval_samples_per_second": 296.46, |
|
"eval_steps_per_second": 2.476, |
|
"step": 30379 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 4.465571857285762e-05, |
|
"loss": 1.5663, |
|
"step": 32166 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.6826915033044254, |
|
"eval_loss": 1.4945769309997559, |
|
"eval_runtime": 1.6046, |
|
"eval_samples_per_second": 298.512, |
|
"eval_steps_per_second": 2.493, |
|
"step": 32166 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 4.435795265088363e-05, |
|
"loss": 1.5614, |
|
"step": 33953 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6798307475317349, |
|
"eval_loss": 1.493360161781311, |
|
"eval_runtime": 1.5976, |
|
"eval_samples_per_second": 299.828, |
|
"eval_steps_per_second": 2.504, |
|
"step": 33953 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 4.406002000666889e-05, |
|
"loss": 1.5558, |
|
"step": 35740 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6893048578564813, |
|
"eval_loss": 1.4512126445770264, |
|
"eval_runtime": 1.5875, |
|
"eval_samples_per_second": 301.731, |
|
"eval_steps_per_second": 2.52, |
|
"step": 35740 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 4.37622540846949e-05, |
|
"loss": 1.5508, |
|
"step": 37527 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6920911905432029, |
|
"eval_loss": 1.4417309761047363, |
|
"eval_runtime": 1.5842, |
|
"eval_samples_per_second": 302.355, |
|
"eval_steps_per_second": 2.525, |
|
"step": 37527 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 4.346448816272091e-05, |
|
"loss": 1.5466, |
|
"step": 39314 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.683659548403263, |
|
"eval_loss": 1.4744044542312622, |
|
"eval_runtime": 1.688, |
|
"eval_samples_per_second": 283.775, |
|
"eval_steps_per_second": 2.37, |
|
"step": 39314 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 4.316672224074692e-05, |
|
"loss": 1.5427, |
|
"step": 41101 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6903853118992561, |
|
"eval_loss": 1.4530385732650757, |
|
"eval_runtime": 1.5701, |
|
"eval_samples_per_second": 305.072, |
|
"eval_steps_per_second": 2.548, |
|
"step": 41101 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 4.286878959653218e-05, |
|
"loss": 1.5382, |
|
"step": 42888 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6900174638048561, |
|
"eval_loss": 1.4377799034118652, |
|
"eval_runtime": 1.5785, |
|
"eval_samples_per_second": 303.458, |
|
"eval_steps_per_second": 2.534, |
|
"step": 42888 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 4.257119039679893e-05, |
|
"loss": 1.5342, |
|
"step": 44675 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.6820495514497034, |
|
"eval_loss": 1.4731613397598267, |
|
"eval_runtime": 1.5702, |
|
"eval_samples_per_second": 305.059, |
|
"eval_steps_per_second": 2.547, |
|
"step": 44675 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 4.2273257752584196e-05, |
|
"loss": 1.5293, |
|
"step": 46462 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.6865246225483279, |
|
"eval_loss": 1.460905909538269, |
|
"eval_runtime": 1.5669, |
|
"eval_samples_per_second": 305.693, |
|
"eval_steps_per_second": 2.553, |
|
"step": 46462 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 4.1975491830610205e-05, |
|
"loss": 1.5259, |
|
"step": 48249 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6942026931254429, |
|
"eval_loss": 1.4399933815002441, |
|
"eval_runtime": 1.5672, |
|
"eval_samples_per_second": 305.645, |
|
"eval_steps_per_second": 2.552, |
|
"step": 48249 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 4.1677559186395465e-05, |
|
"loss": 1.5223, |
|
"step": 50036 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6879446584741712, |
|
"eval_loss": 1.458624243736267, |
|
"eval_runtime": 1.6351, |
|
"eval_samples_per_second": 292.944, |
|
"eval_steps_per_second": 2.446, |
|
"step": 50036 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 4.1379793264421474e-05, |
|
"loss": 1.5197, |
|
"step": 51823 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.6906005955390753, |
|
"eval_loss": 1.437148094177246, |
|
"eval_runtime": 1.5725, |
|
"eval_samples_per_second": 304.606, |
|
"eval_steps_per_second": 2.544, |
|
"step": 51823 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 4.1082194064688226e-05, |
|
"loss": 1.5161, |
|
"step": 53610 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.6908153728511145, |
|
"eval_loss": 1.4334877729415894, |
|
"eval_runtime": 1.5744, |
|
"eval_samples_per_second": 304.247, |
|
"eval_steps_per_second": 2.541, |
|
"step": 53610 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 4.078426142047349e-05, |
|
"loss": 1.5126, |
|
"step": 55397 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.6977723406071297, |
|
"eval_loss": 1.4158803224563599, |
|
"eval_runtime": 1.576, |
|
"eval_samples_per_second": 303.927, |
|
"eval_steps_per_second": 2.538, |
|
"step": 55397 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 4.048632877625875e-05, |
|
"loss": 1.5087, |
|
"step": 57184 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.6858260773724236, |
|
"eval_loss": 1.4411636590957642, |
|
"eval_runtime": 1.6849, |
|
"eval_samples_per_second": 284.296, |
|
"eval_steps_per_second": 2.374, |
|
"step": 57184 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 4.018856285428476e-05, |
|
"loss": 1.5061, |
|
"step": 58971 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.6957515878616796, |
|
"eval_loss": 1.418090581893921, |
|
"eval_runtime": 1.5743, |
|
"eval_samples_per_second": 304.26, |
|
"eval_steps_per_second": 2.541, |
|
"step": 58971 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 3.989079693231077e-05, |
|
"loss": 1.5035, |
|
"step": 60758 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.6902565822572531, |
|
"eval_loss": 1.4357043504714966, |
|
"eval_runtime": 1.5776, |
|
"eval_samples_per_second": 303.629, |
|
"eval_steps_per_second": 2.536, |
|
"step": 60758 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 3.959319773257753e-05, |
|
"loss": 1.5011, |
|
"step": 62545 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.6913778077647522, |
|
"eval_loss": 1.4147114753723145, |
|
"eval_runtime": 1.7484, |
|
"eval_samples_per_second": 273.959, |
|
"eval_steps_per_second": 2.288, |
|
"step": 62545 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 3.929526508836279e-05, |
|
"loss": 1.4967, |
|
"step": 64332 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.6931245246880545, |
|
"eval_loss": 1.4251492023468018, |
|
"eval_runtime": 1.567, |
|
"eval_samples_per_second": 305.681, |
|
"eval_steps_per_second": 2.553, |
|
"step": 64332 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 3.89974991663888e-05, |
|
"loss": 1.4954, |
|
"step": 66119 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.6909720283624589, |
|
"eval_loss": 1.4366341829299927, |
|
"eval_runtime": 1.6092, |
|
"eval_samples_per_second": 297.654, |
|
"eval_steps_per_second": 2.486, |
|
"step": 66119 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 3.869989996665556e-05, |
|
"loss": 1.4925, |
|
"step": 67906 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.6896388757610297, |
|
"eval_loss": 1.4488193988800049, |
|
"eval_runtime": 1.6299, |
|
"eval_samples_per_second": 293.876, |
|
"eval_steps_per_second": 2.454, |
|
"step": 67906 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 3.840196732244081e-05, |
|
"loss": 1.4889, |
|
"step": 69693 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.6906871760575155, |
|
"eval_loss": 1.4335614442825317, |
|
"eval_runtime": 1.5829, |
|
"eval_samples_per_second": 302.612, |
|
"eval_steps_per_second": 2.527, |
|
"step": 69693 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 3.810403467822608e-05, |
|
"loss": 1.4866, |
|
"step": 71480 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.6944786305661221, |
|
"eval_loss": 1.4226477146148682, |
|
"eval_runtime": 1.5748, |
|
"eval_samples_per_second": 304.158, |
|
"eval_steps_per_second": 2.54, |
|
"step": 71480 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 3.7806268756252086e-05, |
|
"loss": 1.4838, |
|
"step": 73267 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.6950605409075478, |
|
"eval_loss": 1.4210412502288818, |
|
"eval_runtime": 1.6183, |
|
"eval_samples_per_second": 295.996, |
|
"eval_steps_per_second": 2.472, |
|
"step": 73267 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 3.7508502834278095e-05, |
|
"loss": 1.4821, |
|
"step": 75054 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.6944428883535937, |
|
"eval_loss": 1.4080859422683716, |
|
"eval_runtime": 1.7239, |
|
"eval_samples_per_second": 277.863, |
|
"eval_steps_per_second": 2.32, |
|
"step": 75054 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 3.72107369123041e-05, |
|
"loss": 1.4792, |
|
"step": 76841 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.6895113064278363, |
|
"eval_loss": 1.4496194124221802, |
|
"eval_runtime": 1.6266, |
|
"eval_samples_per_second": 294.479, |
|
"eval_steps_per_second": 2.459, |
|
"step": 76841 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 3.691297099033011e-05, |
|
"loss": 1.4778, |
|
"step": 78628 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.6925052337752966, |
|
"eval_loss": 1.4263415336608887, |
|
"eval_runtime": 1.5801, |
|
"eval_samples_per_second": 303.146, |
|
"eval_steps_per_second": 2.531, |
|
"step": 78628 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 3.6615038346115374e-05, |
|
"loss": 1.4754, |
|
"step": 80415 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.6981779024625238, |
|
"eval_loss": 1.3994964361190796, |
|
"eval_runtime": 1.5981, |
|
"eval_samples_per_second": 299.73, |
|
"eval_steps_per_second": 2.503, |
|
"step": 80415 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 3.631727242414138e-05, |
|
"loss": 1.4736, |
|
"step": 82202 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.6992426773582804, |
|
"eval_loss": 1.3957608938217163, |
|
"eval_runtime": 1.5875, |
|
"eval_samples_per_second": 301.728, |
|
"eval_steps_per_second": 2.52, |
|
"step": 82202 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 3.6019506502167385e-05, |
|
"loss": 1.4702, |
|
"step": 83989 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.6988620438365379, |
|
"eval_loss": 1.4073089361190796, |
|
"eval_runtime": 1.5808, |
|
"eval_samples_per_second": 303.008, |
|
"eval_steps_per_second": 2.53, |
|
"step": 83989 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 3.572157385795265e-05, |
|
"loss": 1.4683, |
|
"step": 85776 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.6995880128675434, |
|
"eval_loss": 1.3917019367218018, |
|
"eval_runtime": 1.5795, |
|
"eval_samples_per_second": 303.265, |
|
"eval_steps_per_second": 2.532, |
|
"step": 85776 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 3.5423974658219404e-05, |
|
"loss": 1.4663, |
|
"step": 87563 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.6956227054504377, |
|
"eval_loss": 1.4108972549438477, |
|
"eval_runtime": 1.5763, |
|
"eval_samples_per_second": 303.882, |
|
"eval_steps_per_second": 2.538, |
|
"step": 87563 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 3.512604201400467e-05, |
|
"loss": 1.4648, |
|
"step": 89350 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.6990693739424704, |
|
"eval_loss": 1.4003111124038696, |
|
"eval_runtime": 1.5784, |
|
"eval_samples_per_second": 303.472, |
|
"eval_steps_per_second": 2.534, |
|
"step": 89350 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"learning_rate": 3.482810936978993e-05, |
|
"loss": 1.4619, |
|
"step": 91137 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.7039692846253741, |
|
"eval_loss": 1.3846591711044312, |
|
"eval_runtime": 1.5968, |
|
"eval_samples_per_second": 299.97, |
|
"eval_steps_per_second": 2.505, |
|
"step": 91137 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 3.453034344781594e-05, |
|
"loss": 1.4599, |
|
"step": 92924 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.6936358731150427, |
|
"eval_loss": 1.432299256324768, |
|
"eval_runtime": 1.5811, |
|
"eval_samples_per_second": 302.963, |
|
"eval_steps_per_second": 2.53, |
|
"step": 92924 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"learning_rate": 3.423257752584195e-05, |
|
"loss": 1.4596, |
|
"step": 94711 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.6986458829796182, |
|
"eval_loss": 1.3935346603393555, |
|
"eval_runtime": 1.6107, |
|
"eval_samples_per_second": 297.395, |
|
"eval_steps_per_second": 2.483, |
|
"step": 94711 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"learning_rate": 3.393481160386796e-05, |
|
"loss": 1.4567, |
|
"step": 96498 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.6973647322680123, |
|
"eval_loss": 1.4121975898742676, |
|
"eval_runtime": 1.5718, |
|
"eval_samples_per_second": 304.748, |
|
"eval_steps_per_second": 2.545, |
|
"step": 96498 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 3.363704568189397e-05, |
|
"loss": 1.455, |
|
"step": 98285 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.697701760770372, |
|
"eval_loss": 1.3974732160568237, |
|
"eval_runtime": 1.5964, |
|
"eval_samples_per_second": 300.059, |
|
"eval_steps_per_second": 2.506, |
|
"step": 98285 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 3.333927975991997e-05, |
|
"loss": 1.4533, |
|
"step": 100072 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7003512716032037, |
|
"eval_loss": 1.3907233476638794, |
|
"eval_runtime": 1.5952, |
|
"eval_samples_per_second": 300.282, |
|
"eval_steps_per_second": 2.508, |
|
"step": 100072 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"learning_rate": 3.3041680560186735e-05, |
|
"loss": 1.4513, |
|
"step": 101859 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.6997069085833915, |
|
"eval_loss": 1.3957313299179077, |
|
"eval_runtime": 1.5802, |
|
"eval_samples_per_second": 303.121, |
|
"eval_steps_per_second": 2.531, |
|
"step": 101859 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 3.274374791597199e-05, |
|
"loss": 1.45, |
|
"step": 103646 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.7000896509217236, |
|
"eval_loss": 1.390682578086853, |
|
"eval_runtime": 1.6316, |
|
"eval_samples_per_second": 293.58, |
|
"eval_steps_per_second": 2.452, |
|
"step": 103646 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"learning_rate": 3.2445815271757255e-05, |
|
"loss": 1.4466, |
|
"step": 105433 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.698792492819097, |
|
"eval_loss": 1.4062516689300537, |
|
"eval_runtime": 1.5938, |
|
"eval_samples_per_second": 300.532, |
|
"eval_steps_per_second": 2.51, |
|
"step": 105433 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 3.214821607202401e-05, |
|
"loss": 1.4455, |
|
"step": 107220 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7036078889537957, |
|
"eval_loss": 1.3599776029586792, |
|
"eval_runtime": 1.5863, |
|
"eval_samples_per_second": 301.968, |
|
"eval_steps_per_second": 2.522, |
|
"step": 107220 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"learning_rate": 3.1850283427809266e-05, |
|
"loss": 1.4439, |
|
"step": 109007 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.7015180371947121, |
|
"eval_loss": 1.3940563201904297, |
|
"eval_runtime": 1.5866, |
|
"eval_samples_per_second": 301.902, |
|
"eval_steps_per_second": 2.521, |
|
"step": 109007 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"learning_rate": 3.155235078359453e-05, |
|
"loss": 1.4432, |
|
"step": 110794 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.7016475844736108, |
|
"eval_loss": 1.3854174613952637, |
|
"eval_runtime": 1.5895, |
|
"eval_samples_per_second": 301.358, |
|
"eval_steps_per_second": 2.517, |
|
"step": 110794 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"learning_rate": 3.125458486162054e-05, |
|
"loss": 1.4404, |
|
"step": 112581 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.6972722913316973, |
|
"eval_loss": 1.4080028533935547, |
|
"eval_runtime": 1.588, |
|
"eval_samples_per_second": 301.645, |
|
"eval_steps_per_second": 2.519, |
|
"step": 112581 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 3.09566522174058e-05, |
|
"loss": 1.4397, |
|
"step": 114368 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.7010520409594614, |
|
"eval_loss": 1.3923581838607788, |
|
"eval_runtime": 1.6035, |
|
"eval_samples_per_second": 298.723, |
|
"eval_steps_per_second": 2.495, |
|
"step": 114368 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 3.0659053017672554e-05, |
|
"loss": 1.4366, |
|
"step": 116155 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.7049133849337983, |
|
"eval_loss": 1.3872087001800537, |
|
"eval_runtime": 1.5917, |
|
"eval_samples_per_second": 300.927, |
|
"eval_steps_per_second": 2.513, |
|
"step": 116155 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"learning_rate": 3.0361287095698566e-05, |
|
"loss": 1.4358, |
|
"step": 117942 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.7010343677010343, |
|
"eval_loss": 1.3924205303192139, |
|
"eval_runtime": 1.5814, |
|
"eval_samples_per_second": 302.891, |
|
"eval_steps_per_second": 2.529, |
|
"step": 117942 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"learning_rate": 3.0063354451483826e-05, |
|
"loss": 1.4339, |
|
"step": 119729 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.6988593369019926, |
|
"eval_loss": 1.3895306587219238, |
|
"eval_runtime": 1.7118, |
|
"eval_samples_per_second": 279.814, |
|
"eval_steps_per_second": 2.337, |
|
"step": 119729 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"learning_rate": 2.9765588529509835e-05, |
|
"loss": 1.4329, |
|
"step": 121516 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.7001914091088217, |
|
"eval_loss": 1.4007033109664917, |
|
"eval_runtime": 1.5846, |
|
"eval_samples_per_second": 302.287, |
|
"eval_steps_per_second": 2.524, |
|
"step": 121516 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"learning_rate": 2.9467822607535845e-05, |
|
"loss": 1.4319, |
|
"step": 123303 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.7047012042076765, |
|
"eval_loss": 1.3672277927398682, |
|
"eval_runtime": 1.6189, |
|
"eval_samples_per_second": 295.88, |
|
"eval_steps_per_second": 2.471, |
|
"step": 123303 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 2.9169889963321108e-05, |
|
"loss": 1.4287, |
|
"step": 125090 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.7020639054580078, |
|
"eval_loss": 1.3891570568084717, |
|
"eval_runtime": 1.6012, |
|
"eval_samples_per_second": 299.158, |
|
"eval_steps_per_second": 2.498, |
|
"step": 125090 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"learning_rate": 2.8872290763587863e-05, |
|
"loss": 1.4284, |
|
"step": 126877 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.7016447644932612, |
|
"eval_loss": 1.367444396018982, |
|
"eval_runtime": 1.5982, |
|
"eval_samples_per_second": 299.72, |
|
"eval_steps_per_second": 2.503, |
|
"step": 126877 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 2.8574358119373123e-05, |
|
"loss": 1.4265, |
|
"step": 128664 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.6961661071458798, |
|
"eval_loss": 1.3982675075531006, |
|
"eval_runtime": 1.5745, |
|
"eval_samples_per_second": 304.228, |
|
"eval_steps_per_second": 2.541, |
|
"step": 128664 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"learning_rate": 2.8276592197399132e-05, |
|
"loss": 1.4254, |
|
"step": 130451 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.700606782352274, |
|
"eval_loss": 1.3664897680282593, |
|
"eval_runtime": 1.5897, |
|
"eval_samples_per_second": 301.308, |
|
"eval_steps_per_second": 2.516, |
|
"step": 130451 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"learning_rate": 2.797882627542514e-05, |
|
"loss": 1.4231, |
|
"step": 132238 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.7003467887363019, |
|
"eval_loss": 1.3851745128631592, |
|
"eval_runtime": 1.602, |
|
"eval_samples_per_second": 299.009, |
|
"eval_steps_per_second": 2.497, |
|
"step": 132238 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 2.76808936312104e-05, |
|
"loss": 1.4228, |
|
"step": 134025 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.6945481910920341, |
|
"eval_loss": 1.4327718019485474, |
|
"eval_runtime": 1.6912, |
|
"eval_samples_per_second": 283.237, |
|
"eval_steps_per_second": 2.365, |
|
"step": 134025 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"learning_rate": 2.738312770923641e-05, |
|
"loss": 1.4217, |
|
"step": 135812 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.7030551729953062, |
|
"eval_loss": 1.3831229209899902, |
|
"eval_runtime": 1.5761, |
|
"eval_samples_per_second": 303.914, |
|
"eval_steps_per_second": 2.538, |
|
"step": 135812 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"learning_rate": 2.708536178726242e-05, |
|
"loss": 1.4198, |
|
"step": 137599 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.7017066534751423, |
|
"eval_loss": 1.3897149562835693, |
|
"eval_runtime": 1.5767, |
|
"eval_samples_per_second": 303.804, |
|
"eval_steps_per_second": 2.537, |
|
"step": 137599 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"learning_rate": 2.6787429143047683e-05, |
|
"loss": 1.4183, |
|
"step": 139386 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.7027269918836184, |
|
"eval_loss": 1.3789587020874023, |
|
"eval_runtime": 1.5653, |
|
"eval_samples_per_second": 306.009, |
|
"eval_steps_per_second": 2.555, |
|
"step": 139386 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"learning_rate": 2.6489496498832943e-05, |
|
"loss": 1.417, |
|
"step": 141173 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.7063395950094348, |
|
"eval_loss": 1.3697491884231567, |
|
"eval_runtime": 1.5854, |
|
"eval_samples_per_second": 302.136, |
|
"eval_steps_per_second": 2.523, |
|
"step": 141173 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 2.6191730576858952e-05, |
|
"loss": 1.4179, |
|
"step": 142960 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.700801300011207, |
|
"eval_loss": 1.3804936408996582, |
|
"eval_runtime": 1.5689, |
|
"eval_samples_per_second": 305.304, |
|
"eval_steps_per_second": 2.55, |
|
"step": 142960 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"learning_rate": 2.589396465488496e-05, |
|
"loss": 1.414, |
|
"step": 144747 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.7050930684024209, |
|
"eval_loss": 1.3579356670379639, |
|
"eval_runtime": 1.5646, |
|
"eval_samples_per_second": 306.14, |
|
"eval_steps_per_second": 2.556, |
|
"step": 144747 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"learning_rate": 2.559619873291097e-05, |
|
"loss": 1.4132, |
|
"step": 146534 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.7054125504382913, |
|
"eval_loss": 1.3726812601089478, |
|
"eval_runtime": 1.5755, |
|
"eval_samples_per_second": 304.04, |
|
"eval_steps_per_second": 2.539, |
|
"step": 146534 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"learning_rate": 2.5298599533177726e-05, |
|
"loss": 1.4112, |
|
"step": 148321 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.7013713965855024, |
|
"eval_loss": 1.3762907981872559, |
|
"eval_runtime": 1.5766, |
|
"eval_samples_per_second": 303.809, |
|
"eval_steps_per_second": 2.537, |
|
"step": 148321 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"learning_rate": 2.5000666888962985e-05, |
|
"loss": 1.412, |
|
"step": 150108 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.7051213694401571, |
|
"eval_loss": 1.3806371688842773, |
|
"eval_runtime": 1.5857, |
|
"eval_samples_per_second": 302.069, |
|
"eval_steps_per_second": 2.523, |
|
"step": 150108 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"learning_rate": 2.4702734244748252e-05, |
|
"loss": 1.4103, |
|
"step": 151895 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.7021642142599828, |
|
"eval_loss": 1.3889108896255493, |
|
"eval_runtime": 1.5884, |
|
"eval_samples_per_second": 301.569, |
|
"eval_steps_per_second": 2.518, |
|
"step": 151895 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"learning_rate": 2.4405135045015007e-05, |
|
"loss": 1.4082, |
|
"step": 153682 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.6992571829011913, |
|
"eval_loss": 1.3886325359344482, |
|
"eval_runtime": 1.5712, |
|
"eval_samples_per_second": 304.856, |
|
"eval_steps_per_second": 2.546, |
|
"step": 153682 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"learning_rate": 2.4107369123041016e-05, |
|
"loss": 1.4068, |
|
"step": 155469 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.7025410225589132, |
|
"eval_loss": 1.3720707893371582, |
|
"eval_runtime": 1.5911, |
|
"eval_samples_per_second": 301.051, |
|
"eval_steps_per_second": 2.514, |
|
"step": 155469 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"learning_rate": 2.3809436478826276e-05, |
|
"loss": 1.4068, |
|
"step": 157256 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.7049852548799326, |
|
"eval_loss": 1.3589400053024292, |
|
"eval_runtime": 1.5763, |
|
"eval_samples_per_second": 303.874, |
|
"eval_steps_per_second": 2.538, |
|
"step": 157256 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"learning_rate": 2.351150383461154e-05, |
|
"loss": 1.4043, |
|
"step": 159043 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.7043347949159128, |
|
"eval_loss": 1.359857439994812, |
|
"eval_runtime": 1.5796, |
|
"eval_samples_per_second": 303.245, |
|
"eval_steps_per_second": 2.532, |
|
"step": 159043 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 2.321373791263755e-05, |
|
"loss": 1.4042, |
|
"step": 160830 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.7055383080164098, |
|
"eval_loss": 1.3674854040145874, |
|
"eval_runtime": 1.7188, |
|
"eval_samples_per_second": 278.687, |
|
"eval_steps_per_second": 2.327, |
|
"step": 160830 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"learning_rate": 2.2915971990663558e-05, |
|
"loss": 1.4033, |
|
"step": 162617 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.7031184095829935, |
|
"eval_loss": 1.3719748258590698, |
|
"eval_runtime": 1.5775, |
|
"eval_samples_per_second": 303.654, |
|
"eval_steps_per_second": 2.536, |
|
"step": 162617 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"learning_rate": 2.2618206068689564e-05, |
|
"loss": 1.402, |
|
"step": 164404 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.7067319874989442, |
|
"eval_loss": 1.3505655527114868, |
|
"eval_runtime": 1.5905, |
|
"eval_samples_per_second": 301.16, |
|
"eval_steps_per_second": 2.515, |
|
"step": 164404 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"learning_rate": 2.2320273424474827e-05, |
|
"loss": 1.4004, |
|
"step": 166191 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.701749972045175, |
|
"eval_loss": 1.3833116292953491, |
|
"eval_runtime": 1.5756, |
|
"eval_samples_per_second": 304.01, |
|
"eval_steps_per_second": 2.539, |
|
"step": 166191 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"learning_rate": 2.2022340780260087e-05, |
|
"loss": 1.4001, |
|
"step": 167978 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.7020838620534078, |
|
"eval_loss": 1.3734411001205444, |
|
"eval_runtime": 1.5691, |
|
"eval_samples_per_second": 305.274, |
|
"eval_steps_per_second": 2.549, |
|
"step": 167978 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"learning_rate": 2.1724574858286096e-05, |
|
"loss": 1.398, |
|
"step": 169765 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.7036638236377547, |
|
"eval_loss": 1.3791979551315308, |
|
"eval_runtime": 1.5625, |
|
"eval_samples_per_second": 306.555, |
|
"eval_steps_per_second": 2.56, |
|
"step": 169765 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"learning_rate": 2.1426975658552855e-05, |
|
"loss": 1.3983, |
|
"step": 171552 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.7049873914261698, |
|
"eval_loss": 1.3675692081451416, |
|
"eval_runtime": 1.5747, |
|
"eval_samples_per_second": 304.183, |
|
"eval_steps_per_second": 2.54, |
|
"step": 171552 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"learning_rate": 2.112904301433811e-05, |
|
"loss": 1.3966, |
|
"step": 173339 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.7027283114566787, |
|
"eval_loss": 1.3888317346572876, |
|
"eval_runtime": 1.5809, |
|
"eval_samples_per_second": 302.986, |
|
"eval_steps_per_second": 2.53, |
|
"step": 173339 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"learning_rate": 2.0831110370123374e-05, |
|
"loss": 1.3953, |
|
"step": 175126 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.7004118244386777, |
|
"eval_loss": 1.3927955627441406, |
|
"eval_runtime": 1.5702, |
|
"eval_samples_per_second": 305.058, |
|
"eval_steps_per_second": 2.547, |
|
"step": 175126 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"learning_rate": 2.0533344448149383e-05, |
|
"loss": 1.3934, |
|
"step": 176913 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.7010918253079508, |
|
"eval_loss": 1.382420301437378, |
|
"eval_runtime": 1.5875, |
|
"eval_samples_per_second": 301.726, |
|
"eval_steps_per_second": 2.52, |
|
"step": 176913 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 2.0235578526175393e-05, |
|
"loss": 1.3928, |
|
"step": 178700 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.703971419076105, |
|
"eval_loss": 1.382891297340393, |
|
"eval_runtime": 1.5699, |
|
"eval_samples_per_second": 305.124, |
|
"eval_steps_per_second": 2.548, |
|
"step": 178700 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"learning_rate": 1.99378126042014e-05, |
|
"loss": 1.3919, |
|
"step": 180487 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_accuracy": 0.7045689534721252, |
|
"eval_loss": 1.3534067869186401, |
|
"eval_runtime": 1.5763, |
|
"eval_samples_per_second": 303.883, |
|
"eval_steps_per_second": 2.538, |
|
"step": 180487 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"learning_rate": 1.9640213404468157e-05, |
|
"loss": 1.3902, |
|
"step": 182274 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_accuracy": 0.7048191080299756, |
|
"eval_loss": 1.3559306859970093, |
|
"eval_runtime": 1.7782, |
|
"eval_samples_per_second": 269.367, |
|
"eval_steps_per_second": 2.249, |
|
"step": 182274 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"learning_rate": 1.934228076025342e-05, |
|
"loss": 1.3896, |
|
"step": 184061 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_accuracy": 0.7044582060694371, |
|
"eval_loss": 1.3681745529174805, |
|
"eval_runtime": 1.5953, |
|
"eval_samples_per_second": 300.253, |
|
"eval_steps_per_second": 2.507, |
|
"step": 184061 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"learning_rate": 1.904451483827943e-05, |
|
"loss": 1.3895, |
|
"step": 185848 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.7053175360772643, |
|
"eval_loss": 1.3642874956130981, |
|
"eval_runtime": 1.584, |
|
"eval_samples_per_second": 302.391, |
|
"eval_steps_per_second": 2.525, |
|
"step": 185848 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"learning_rate": 1.874658219406469e-05, |
|
"loss": 1.3884, |
|
"step": 187635 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_accuracy": 0.7018311771061307, |
|
"eval_loss": 1.3854254484176636, |
|
"eval_runtime": 1.7105, |
|
"eval_samples_per_second": 280.037, |
|
"eval_steps_per_second": 2.339, |
|
"step": 187635 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"learning_rate": 1.8448816272090695e-05, |
|
"loss": 1.3878, |
|
"step": 189422 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_accuracy": 0.7038266741699494, |
|
"eval_loss": 1.370182752609253, |
|
"eval_runtime": 1.5712, |
|
"eval_samples_per_second": 304.857, |
|
"eval_steps_per_second": 2.546, |
|
"step": 189422 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"learning_rate": 1.8151050350116704e-05, |
|
"loss": 1.3865, |
|
"step": 191209 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_accuracy": 0.7055550824604729, |
|
"eval_loss": 1.3529911041259766, |
|
"eval_runtime": 1.587, |
|
"eval_samples_per_second": 301.822, |
|
"eval_steps_per_second": 2.52, |
|
"step": 191209 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"learning_rate": 1.7853284428142717e-05, |
|
"loss": 1.3842, |
|
"step": 192996 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.7007661234155175, |
|
"eval_loss": 1.3804410696029663, |
|
"eval_runtime": 1.5953, |
|
"eval_samples_per_second": 300.25, |
|
"eval_steps_per_second": 2.507, |
|
"step": 192996 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"learning_rate": 1.7555351783927977e-05, |
|
"loss": 1.3836, |
|
"step": 194783 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_accuracy": 0.7072856221792392, |
|
"eval_loss": 1.3507838249206543, |
|
"eval_runtime": 1.5928, |
|
"eval_samples_per_second": 300.728, |
|
"eval_steps_per_second": 2.511, |
|
"step": 194783 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"learning_rate": 1.7257585861953983e-05, |
|
"loss": 1.3831, |
|
"step": 196570 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_accuracy": 0.7002392681210415, |
|
"eval_loss": 1.3821208477020264, |
|
"eval_runtime": 1.5673, |
|
"eval_samples_per_second": 305.62, |
|
"eval_steps_per_second": 2.552, |
|
"step": 196570 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"learning_rate": 1.6959819939979992e-05, |
|
"loss": 1.3828, |
|
"step": 198357 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_accuracy": 0.7063221331514015, |
|
"eval_loss": 1.355231523513794, |
|
"eval_runtime": 1.5725, |
|
"eval_samples_per_second": 304.605, |
|
"eval_steps_per_second": 2.544, |
|
"step": 198357 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"learning_rate": 1.6662054018006005e-05, |
|
"loss": 1.382, |
|
"step": 200144 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.7089512489136833, |
|
"eval_loss": 1.3567301034927368, |
|
"eval_runtime": 1.5868, |
|
"eval_samples_per_second": 301.856, |
|
"eval_steps_per_second": 2.521, |
|
"step": 200144 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"learning_rate": 1.6364288096032014e-05, |
|
"loss": 1.3806, |
|
"step": 201931 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"eval_accuracy": 0.7021602747892242, |
|
"eval_loss": 1.3915045261383057, |
|
"eval_runtime": 1.5651, |
|
"eval_samples_per_second": 306.042, |
|
"eval_steps_per_second": 2.556, |
|
"step": 201931 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"learning_rate": 1.6066355451817274e-05, |
|
"loss": 1.3802, |
|
"step": 203718 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_accuracy": 0.7098393008175923, |
|
"eval_loss": 1.366058588027954, |
|
"eval_runtime": 1.5787, |
|
"eval_samples_per_second": 303.406, |
|
"eval_steps_per_second": 2.534, |
|
"step": 203718 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"learning_rate": 1.576858952984328e-05, |
|
"loss": 1.3789, |
|
"step": 205505 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_accuracy": 0.7071270105379922, |
|
"eval_loss": 1.358585000038147, |
|
"eval_runtime": 1.5848, |
|
"eval_samples_per_second": 302.249, |
|
"eval_steps_per_second": 2.524, |
|
"step": 205505 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"learning_rate": 1.5470656885628543e-05, |
|
"loss": 1.3789, |
|
"step": 207292 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_accuracy": 0.7049299719887955, |
|
"eval_loss": 1.3643481731414795, |
|
"eval_runtime": 1.5702, |
|
"eval_samples_per_second": 305.048, |
|
"eval_steps_per_second": 2.547, |
|
"step": 207292 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"learning_rate": 1.5172724241413804e-05, |
|
"loss": 1.3782, |
|
"step": 209079 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"eval_accuracy": 0.7037224045555275, |
|
"eval_loss": 1.3715710639953613, |
|
"eval_runtime": 1.5697, |
|
"eval_samples_per_second": 305.149, |
|
"eval_steps_per_second": 2.548, |
|
"step": 209079 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"learning_rate": 1.4874958319439813e-05, |
|
"loss": 1.3763, |
|
"step": 210866 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_accuracy": 0.70765141977899, |
|
"eval_loss": 1.3476457595825195, |
|
"eval_runtime": 1.5964, |
|
"eval_samples_per_second": 300.046, |
|
"eval_steps_per_second": 2.506, |
|
"step": 210866 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"learning_rate": 1.4577192397465823e-05, |
|
"loss": 1.3762, |
|
"step": 212653 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"eval_accuracy": 0.7066520959426138, |
|
"eval_loss": 1.3518937826156616, |
|
"eval_runtime": 1.6463, |
|
"eval_samples_per_second": 290.954, |
|
"eval_steps_per_second": 2.43, |
|
"step": 212653 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"learning_rate": 1.427942647549183e-05, |
|
"loss": 1.3741, |
|
"step": 214440 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy": 0.7027226737907122, |
|
"eval_loss": 1.3735955953598022, |
|
"eval_runtime": 1.5849, |
|
"eval_samples_per_second": 302.235, |
|
"eval_steps_per_second": 2.524, |
|
"step": 214440 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"learning_rate": 1.398166055351784e-05, |
|
"loss": 1.3741, |
|
"step": 216227 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"eval_accuracy": 0.7072822864813468, |
|
"eval_loss": 1.339490532875061, |
|
"eval_runtime": 1.5667, |
|
"eval_samples_per_second": 305.741, |
|
"eval_steps_per_second": 2.553, |
|
"step": 216227 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"learning_rate": 1.3684061353784596e-05, |
|
"loss": 1.3732, |
|
"step": 218014 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_accuracy": 0.7020326111235202, |
|
"eval_loss": 1.3724454641342163, |
|
"eval_runtime": 1.7004, |
|
"eval_samples_per_second": 281.702, |
|
"eval_steps_per_second": 2.352, |
|
"step": 218014 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"learning_rate": 1.3386128709569856e-05, |
|
"loss": 1.3724, |
|
"step": 219801 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"eval_accuracy": 0.7025496548809691, |
|
"eval_loss": 1.37235689163208, |
|
"eval_runtime": 1.5713, |
|
"eval_samples_per_second": 304.85, |
|
"eval_steps_per_second": 2.546, |
|
"step": 219801 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"learning_rate": 1.3088362787595865e-05, |
|
"loss": 1.3715, |
|
"step": 221588 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_accuracy": 0.7083146445391343, |
|
"eval_loss": 1.3571741580963135, |
|
"eval_runtime": 1.5874, |
|
"eval_samples_per_second": 301.751, |
|
"eval_steps_per_second": 2.52, |
|
"step": 221588 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 1.2790430143381127e-05, |
|
"loss": 1.3713, |
|
"step": 223375 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_accuracy": 0.7100663560638147, |
|
"eval_loss": 1.3430290222167969, |
|
"eval_runtime": 1.5682, |
|
"eval_samples_per_second": 305.439, |
|
"eval_steps_per_second": 2.551, |
|
"step": 223375 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"learning_rate": 1.2492497499166388e-05, |
|
"loss": 1.369, |
|
"step": 225162 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_accuracy": 0.7103282312449283, |
|
"eval_loss": 1.3457125425338745, |
|
"eval_runtime": 1.7081, |
|
"eval_samples_per_second": 280.423, |
|
"eval_steps_per_second": 2.342, |
|
"step": 225162 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"learning_rate": 1.2194731577192398e-05, |
|
"loss": 1.3687, |
|
"step": 226949 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"eval_accuracy": 0.7112089514568553, |
|
"eval_loss": 1.3456733226776123, |
|
"eval_runtime": 1.5857, |
|
"eval_samples_per_second": 302.075, |
|
"eval_steps_per_second": 2.523, |
|
"step": 226949 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"learning_rate": 1.1896965655218407e-05, |
|
"loss": 1.3698, |
|
"step": 228736 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_accuracy": 0.7024267451069125, |
|
"eval_loss": 1.3649426698684692, |
|
"eval_runtime": 1.5737, |
|
"eval_samples_per_second": 304.386, |
|
"eval_steps_per_second": 2.542, |
|
"step": 228736 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"learning_rate": 1.1599199733244414e-05, |
|
"loss": 1.3683, |
|
"step": 230523 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"eval_accuracy": 0.7105218474077428, |
|
"eval_loss": 1.3422349691390991, |
|
"eval_runtime": 1.5749, |
|
"eval_samples_per_second": 304.149, |
|
"eval_steps_per_second": 2.54, |
|
"step": 230523 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"learning_rate": 1.1301433811270425e-05, |
|
"loss": 1.3668, |
|
"step": 232310 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_accuracy": 0.7058147358977235, |
|
"eval_loss": 1.3530675172805786, |
|
"eval_runtime": 1.5757, |
|
"eval_samples_per_second": 303.997, |
|
"eval_steps_per_second": 2.539, |
|
"step": 232310 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"learning_rate": 1.1003667889296433e-05, |
|
"loss": 1.3675, |
|
"step": 234097 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"eval_accuracy": 0.7058542882801976, |
|
"eval_loss": 1.3607447147369385, |
|
"eval_runtime": 1.5807, |
|
"eval_samples_per_second": 303.038, |
|
"eval_steps_per_second": 2.531, |
|
"step": 234097 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"learning_rate": 1.0705901967322442e-05, |
|
"loss": 1.3661, |
|
"step": 235884 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_accuracy": 0.7062627163521837, |
|
"eval_loss": 1.3552587032318115, |
|
"eval_runtime": 1.5753, |
|
"eval_samples_per_second": 304.064, |
|
"eval_steps_per_second": 2.539, |
|
"step": 235884 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"learning_rate": 1.040813604534845e-05, |
|
"loss": 1.3642, |
|
"step": 237671 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"eval_accuracy": 0.7053641566603075, |
|
"eval_loss": 1.3540754318237305, |
|
"eval_runtime": 1.5925, |
|
"eval_samples_per_second": 300.778, |
|
"eval_steps_per_second": 2.512, |
|
"step": 237671 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"learning_rate": 1.0110203401133713e-05, |
|
"loss": 1.364, |
|
"step": 239458 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"eval_accuracy": 0.7079530276046304, |
|
"eval_loss": 1.3564157485961914, |
|
"eval_runtime": 1.7195, |
|
"eval_samples_per_second": 278.562, |
|
"eval_steps_per_second": 2.326, |
|
"step": 239458 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"learning_rate": 9.81243747915972e-06, |
|
"loss": 1.3635, |
|
"step": 241245 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"eval_accuracy": 0.7055403556771546, |
|
"eval_loss": 1.3633564710617065, |
|
"eval_runtime": 1.5788, |
|
"eval_samples_per_second": 303.389, |
|
"eval_steps_per_second": 2.534, |
|
"step": 241245 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"learning_rate": 9.51467155718573e-06, |
|
"loss": 1.3621, |
|
"step": 243032 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_accuracy": 0.7042685476900772, |
|
"eval_loss": 1.3689273595809937, |
|
"eval_runtime": 1.5862, |
|
"eval_samples_per_second": 301.987, |
|
"eval_steps_per_second": 2.522, |
|
"step": 243032 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"learning_rate": 9.217072357452485e-06, |
|
"loss": 1.3623, |
|
"step": 244819 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"eval_accuracy": 0.7086510057673372, |
|
"eval_loss": 1.3668452501296997, |
|
"eval_runtime": 1.7126, |
|
"eval_samples_per_second": 279.697, |
|
"eval_steps_per_second": 2.336, |
|
"step": 244819 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"learning_rate": 8.919139713237746e-06, |
|
"loss": 1.3614, |
|
"step": 246606 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"eval_accuracy": 0.7061014196511074, |
|
"eval_loss": 1.3534598350524902, |
|
"eval_runtime": 1.5838, |
|
"eval_samples_per_second": 302.445, |
|
"eval_steps_per_second": 2.526, |
|
"step": 246606 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"learning_rate": 8.621373791263756e-06, |
|
"loss": 1.3614, |
|
"step": 248393 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"eval_accuracy": 0.7085629563319998, |
|
"eval_loss": 1.358819603919983, |
|
"eval_runtime": 1.606, |
|
"eval_samples_per_second": 298.26, |
|
"eval_steps_per_second": 2.491, |
|
"step": 248393 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"learning_rate": 8.323607869289763e-06, |
|
"loss": 1.3588, |
|
"step": 250180 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_accuracy": 0.7090991475744103, |
|
"eval_loss": 1.3471907377243042, |
|
"eval_runtime": 1.5823, |
|
"eval_samples_per_second": 302.722, |
|
"eval_steps_per_second": 2.528, |
|
"step": 250180 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"learning_rate": 8.025841947315772e-06, |
|
"loss": 1.3598, |
|
"step": 251967 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"eval_accuracy": 0.7084796225037217, |
|
"eval_loss": 1.3424988985061646, |
|
"eval_runtime": 1.5825, |
|
"eval_samples_per_second": 302.692, |
|
"eval_steps_per_second": 2.528, |
|
"step": 251967 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"learning_rate": 7.728076025341782e-06, |
|
"loss": 1.3601, |
|
"step": 253754 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"eval_accuracy": 0.7098971286457155, |
|
"eval_loss": 1.3533294200897217, |
|
"eval_runtime": 1.7592, |
|
"eval_samples_per_second": 272.276, |
|
"eval_steps_per_second": 2.274, |
|
"step": 253754 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"learning_rate": 7.430143381127043e-06, |
|
"loss": 1.3585, |
|
"step": 255541 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"eval_accuracy": 0.711209661674058, |
|
"eval_loss": 1.329834222793579, |
|
"eval_runtime": 1.5914, |
|
"eval_samples_per_second": 300.99, |
|
"eval_steps_per_second": 2.513, |
|
"step": 255541 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"learning_rate": 7.132377459153051e-06, |
|
"loss": 1.3586, |
|
"step": 257328 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_accuracy": 0.703638680447402, |
|
"eval_loss": 1.3582900762557983, |
|
"eval_runtime": 1.5993, |
|
"eval_samples_per_second": 299.511, |
|
"eval_steps_per_second": 2.501, |
|
"step": 257328 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"learning_rate": 6.83461153717906e-06, |
|
"loss": 1.3583, |
|
"step": 259115 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"eval_accuracy": 0.7081236721458124, |
|
"eval_loss": 1.3454816341400146, |
|
"eval_runtime": 1.6765, |
|
"eval_samples_per_second": 285.71, |
|
"eval_steps_per_second": 2.386, |
|
"step": 259115 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"learning_rate": 6.536845615205069e-06, |
|
"loss": 1.3567, |
|
"step": 260902 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"eval_accuracy": 0.7044657007018338, |
|
"eval_loss": 1.367392659187317, |
|
"eval_runtime": 1.5885, |
|
"eval_samples_per_second": 301.545, |
|
"eval_steps_per_second": 2.518, |
|
"step": 260902 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"learning_rate": 6.239079693231077e-06, |
|
"loss": 1.3561, |
|
"step": 262689 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"eval_accuracy": 0.7055519671993036, |
|
"eval_loss": 1.370457649230957, |
|
"eval_runtime": 1.577, |
|
"eval_samples_per_second": 303.746, |
|
"eval_steps_per_second": 2.537, |
|
"step": 262689 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"learning_rate": 5.941147049016339e-06, |
|
"loss": 1.3552, |
|
"step": 264476 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_accuracy": 0.7079978529253892, |
|
"eval_loss": 1.3506109714508057, |
|
"eval_runtime": 1.5817, |
|
"eval_samples_per_second": 302.839, |
|
"eval_steps_per_second": 2.529, |
|
"step": 264476 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"learning_rate": 5.6432144048016e-06, |
|
"loss": 1.3547, |
|
"step": 266263 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"eval_accuracy": 0.7119885902849632, |
|
"eval_loss": 1.3456552028656006, |
|
"eval_runtime": 1.7734, |
|
"eval_samples_per_second": 270.099, |
|
"eval_steps_per_second": 2.256, |
|
"step": 266263 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 5.34544848282761e-06, |
|
"loss": 1.3538, |
|
"step": 268050 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_accuracy": 0.7099597756501048, |
|
"eval_loss": 1.3244553804397583, |
|
"eval_runtime": 1.5873, |
|
"eval_samples_per_second": 301.767, |
|
"eval_steps_per_second": 2.52, |
|
"step": 268050 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"learning_rate": 5.047682560853618e-06, |
|
"loss": 1.3543, |
|
"step": 269837 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"eval_accuracy": 0.7109617665330096, |
|
"eval_loss": 1.3277746438980103, |
|
"eval_runtime": 1.5987, |
|
"eval_samples_per_second": 299.621, |
|
"eval_steps_per_second": 2.502, |
|
"step": 269837 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"learning_rate": 4.749916638879627e-06, |
|
"loss": 1.3549, |
|
"step": 271624 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_accuracy": 0.7103175715446927, |
|
"eval_loss": 1.334486961364746, |
|
"eval_runtime": 1.6577, |
|
"eval_samples_per_second": 288.956, |
|
"eval_steps_per_second": 2.413, |
|
"step": 271624 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"learning_rate": 4.452150716905636e-06, |
|
"loss": 1.3528, |
|
"step": 273411 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"eval_accuracy": 0.7089188813134827, |
|
"eval_loss": 1.3335890769958496, |
|
"eval_runtime": 1.5839, |
|
"eval_samples_per_second": 302.417, |
|
"eval_steps_per_second": 2.525, |
|
"step": 273411 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"learning_rate": 4.154551517172391e-06, |
|
"loss": 1.3528, |
|
"step": 275198 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"eval_accuracy": 0.7074926352064921, |
|
"eval_loss": 1.3620299100875854, |
|
"eval_runtime": 1.6136, |
|
"eval_samples_per_second": 296.847, |
|
"eval_steps_per_second": 2.479, |
|
"step": 275198 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"learning_rate": 3.856618872957652e-06, |
|
"loss": 1.3531, |
|
"step": 276985 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"eval_accuracy": 0.7119889627210272, |
|
"eval_loss": 1.3383039236068726, |
|
"eval_runtime": 1.6118, |
|
"eval_samples_per_second": 297.175, |
|
"eval_steps_per_second": 2.482, |
|
"step": 276985 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"learning_rate": 3.558852950983661e-06, |
|
"loss": 1.3525, |
|
"step": 278772 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_accuracy": 0.709147771696638, |
|
"eval_loss": 1.3376810550689697, |
|
"eval_runtime": 1.6014, |
|
"eval_samples_per_second": 299.106, |
|
"eval_steps_per_second": 2.498, |
|
"step": 278772 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"learning_rate": 3.2609203067689235e-06, |
|
"loss": 1.3509, |
|
"step": 280559 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"eval_accuracy": 0.7054241768129798, |
|
"eval_loss": 1.363862156867981, |
|
"eval_runtime": 1.5926, |
|
"eval_samples_per_second": 300.762, |
|
"eval_steps_per_second": 2.512, |
|
"step": 280559 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"learning_rate": 2.962987662554185e-06, |
|
"loss": 1.3502, |
|
"step": 282346 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"eval_accuracy": 0.7090706112196483, |
|
"eval_loss": 1.345644235610962, |
|
"eval_runtime": 1.7541, |
|
"eval_samples_per_second": 273.075, |
|
"eval_steps_per_second": 2.28, |
|
"step": 282346 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"learning_rate": 2.6653884628209403e-06, |
|
"loss": 1.351, |
|
"step": 284133 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"eval_accuracy": 0.7110125231650475, |
|
"eval_loss": 1.3358895778656006, |
|
"eval_runtime": 1.7083, |
|
"eval_samples_per_second": 280.394, |
|
"eval_steps_per_second": 2.341, |
|
"step": 284133 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"learning_rate": 2.3674558186062022e-06, |
|
"loss": 1.3493, |
|
"step": 285920 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_accuracy": 0.7128785320274682, |
|
"eval_loss": 1.3361459970474243, |
|
"eval_runtime": 1.5991, |
|
"eval_samples_per_second": 299.536, |
|
"eval_steps_per_second": 2.501, |
|
"step": 285920 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"learning_rate": 2.069523174391464e-06, |
|
"loss": 1.3497, |
|
"step": 287707 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"eval_accuracy": 0.7090949997187693, |
|
"eval_loss": 1.3350205421447754, |
|
"eval_runtime": 1.5987, |
|
"eval_samples_per_second": 299.61, |
|
"eval_steps_per_second": 2.502, |
|
"step": 287707 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"learning_rate": 1.7717572524174725e-06, |
|
"loss": 1.3484, |
|
"step": 289494 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"eval_accuracy": 0.7074801387490209, |
|
"eval_loss": 1.3582346439361572, |
|
"eval_runtime": 1.5871, |
|
"eval_samples_per_second": 301.812, |
|
"eval_steps_per_second": 2.52, |
|
"step": 289494 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"learning_rate": 1.4739913304434811e-06, |
|
"loss": 1.35, |
|
"step": 291281 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"eval_accuracy": 0.7099812203940914, |
|
"eval_loss": 1.3440545797348022, |
|
"eval_runtime": 1.5859, |
|
"eval_samples_per_second": 302.036, |
|
"eval_steps_per_second": 2.522, |
|
"step": 291281 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"learning_rate": 1.17622540846949e-06, |
|
"loss": 1.3485, |
|
"step": 293068 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_accuracy": 0.709328337341312, |
|
"eval_loss": 1.322691559791565, |
|
"eval_runtime": 1.6662, |
|
"eval_samples_per_second": 287.479, |
|
"eval_steps_per_second": 2.401, |
|
"step": 293068 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"learning_rate": 8.784594864954986e-07, |
|
"loss": 1.3485, |
|
"step": 294855 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"eval_accuracy": 0.711818259167872, |
|
"eval_loss": 1.3385770320892334, |
|
"eval_runtime": 1.6184, |
|
"eval_samples_per_second": 295.967, |
|
"eval_steps_per_second": 2.472, |
|
"step": 294855 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"learning_rate": 5.806935645215072e-07, |
|
"loss": 1.3483, |
|
"step": 296642 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"eval_accuracy": 0.7086703561799347, |
|
"eval_loss": 1.347002625465393, |
|
"eval_runtime": 1.5811, |
|
"eval_samples_per_second": 302.945, |
|
"eval_steps_per_second": 2.53, |
|
"step": 296642 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"learning_rate": 2.829276425475159e-07, |
|
"loss": 1.3481, |
|
"step": 298429 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"eval_accuracy": 0.7087042253521126, |
|
"eval_loss": 1.3404722213745117, |
|
"eval_runtime": 1.6093, |
|
"eval_samples_per_second": 297.645, |
|
"eval_steps_per_second": 2.486, |
|
"step": 298429 |
|
}, |
|
{ |
|
"epoch": 167.88, |
|
"learning_rate": 2.1007002334111373e-08, |
|
"loss": 1.3465, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 167.88, |
|
"eval_accuracy": 0.7062244840892568, |
|
"eval_loss": 1.3382107019424438, |
|
"eval_runtime": 1.6088, |
|
"eval_samples_per_second": 297.737, |
|
"eval_steps_per_second": 2.486, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 167.88, |
|
"step": 300000, |
|
"total_flos": 5.088223250635293e+18, |
|
"train_loss": 1.4402713134765626, |
|
"train_runtime": 110171.7858, |
|
"train_samples_per_second": 348.547, |
|
"train_steps_per_second": 2.723 |
|
} |
|
], |
|
"max_steps": 300000, |
|
"num_train_epochs": 168, |
|
"total_flos": 5.088223250635293e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|