|
{ |
|
"best_metric": 0.7719072164948454, |
|
"best_model_checkpoint": "roberta-large-movies/checkpoint-72500", |
|
"epoch": 30.0, |
|
"global_step": 83910, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.970504111548088e-05, |
|
"loss": 1.7698, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.6738421395955643, |
|
"eval_loss": 1.6167851686477661, |
|
"eval_runtime": 0.8246, |
|
"eval_samples_per_second": 606.37, |
|
"eval_steps_per_second": 38.808, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.94082946013586e-05, |
|
"loss": 1.7761, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.6829508196721311, |
|
"eval_loss": 1.6522468328475952, |
|
"eval_runtime": 0.7873, |
|
"eval_samples_per_second": 635.049, |
|
"eval_steps_per_second": 40.643, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.9110356334167565e-05, |
|
"loss": 1.7626, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.6660117878192534, |
|
"eval_loss": 1.6534239053726196, |
|
"eval_runtime": 0.7869, |
|
"eval_samples_per_second": 635.425, |
|
"eval_steps_per_second": 40.667, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.8812418066976524e-05, |
|
"loss": 1.7602, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.6787299419597133, |
|
"eval_loss": 1.6575504541397095, |
|
"eval_runtime": 0.7882, |
|
"eval_samples_per_second": 634.385, |
|
"eval_steps_per_second": 40.601, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.851447979978549e-05, |
|
"loss": 1.7587, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.6772697150430749, |
|
"eval_loss": 1.6266298294067383, |
|
"eval_runtime": 0.7893, |
|
"eval_samples_per_second": 633.509, |
|
"eval_steps_per_second": 40.545, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.821654153259445e-05, |
|
"loss": 1.7047, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.6851971557853911, |
|
"eval_loss": 1.605985164642334, |
|
"eval_runtime": 0.8181, |
|
"eval_samples_per_second": 611.179, |
|
"eval_steps_per_second": 39.115, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.791860326540341e-05, |
|
"loss": 1.6782, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.6906354515050167, |
|
"eval_loss": 1.599035382270813, |
|
"eval_runtime": 0.8184, |
|
"eval_samples_per_second": 610.967, |
|
"eval_steps_per_second": 39.102, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.7620664998212375e-05, |
|
"loss": 1.6733, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.6967426710097719, |
|
"eval_loss": 1.5377483367919922, |
|
"eval_runtime": 0.819, |
|
"eval_samples_per_second": 610.521, |
|
"eval_steps_per_second": 39.073, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.7322726731021334e-05, |
|
"loss": 1.6664, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_accuracy": 0.6746607762701168, |
|
"eval_loss": 1.6434643268585205, |
|
"eval_runtime": 0.7966, |
|
"eval_samples_per_second": 627.631, |
|
"eval_steps_per_second": 40.168, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.70247884638303e-05, |
|
"loss": 1.6719, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_accuracy": 0.6907181571815718, |
|
"eval_loss": 1.483905553817749, |
|
"eval_runtime": 0.7989, |
|
"eval_samples_per_second": 625.841, |
|
"eval_steps_per_second": 40.054, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.672685019663926e-05, |
|
"loss": 1.6502, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.6896661367249602, |
|
"eval_loss": 1.535127878189087, |
|
"eval_runtime": 0.823, |
|
"eval_samples_per_second": 607.558, |
|
"eval_steps_per_second": 38.884, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.642891192944822e-05, |
|
"loss": 1.6233, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_accuracy": 0.6763219939373526, |
|
"eval_loss": 1.6817570924758911, |
|
"eval_runtime": 0.7881, |
|
"eval_samples_per_second": 634.403, |
|
"eval_steps_per_second": 40.602, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.6130973662257184e-05, |
|
"loss": 1.6127, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_accuracy": 0.685335059889932, |
|
"eval_loss": 1.5865211486816406, |
|
"eval_runtime": 0.787, |
|
"eval_samples_per_second": 635.291, |
|
"eval_steps_per_second": 40.659, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.5833035395066143e-05, |
|
"loss": 1.6274, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_accuracy": 0.7003633961017509, |
|
"eval_loss": 1.5004233121871948, |
|
"eval_runtime": 0.8009, |
|
"eval_samples_per_second": 624.318, |
|
"eval_steps_per_second": 39.956, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 4.553628888094387e-05, |
|
"loss": 1.601, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_accuracy": 0.6929970129439097, |
|
"eval_loss": 1.452188491821289, |
|
"eval_runtime": 0.7898, |
|
"eval_samples_per_second": 633.056, |
|
"eval_steps_per_second": 40.516, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.523835061375284e-05, |
|
"loss": 1.6123, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_accuracy": 0.689419795221843, |
|
"eval_loss": 1.5370689630508423, |
|
"eval_runtime": 0.8546, |
|
"eval_samples_per_second": 585.05, |
|
"eval_steps_per_second": 37.443, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 4.4940412346561796e-05, |
|
"loss": 1.6074, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_accuracy": 0.6952157912345266, |
|
"eval_loss": 1.5342369079589844, |
|
"eval_runtime": 0.8214, |
|
"eval_samples_per_second": 608.68, |
|
"eval_steps_per_second": 38.956, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 4.4642474079370755e-05, |
|
"loss": 1.563, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_accuracy": 0.6875834445927904, |
|
"eval_loss": 1.568178415298462, |
|
"eval_runtime": 0.8488, |
|
"eval_samples_per_second": 589.06, |
|
"eval_steps_per_second": 37.7, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 4.4344535812179714e-05, |
|
"loss": 1.5746, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_accuracy": 0.6957663275352806, |
|
"eval_loss": 1.5704632997512817, |
|
"eval_runtime": 0.852, |
|
"eval_samples_per_second": 586.84, |
|
"eval_steps_per_second": 37.558, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 4.404778929805745e-05, |
|
"loss": 1.5539, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"eval_accuracy": 0.7040711597673623, |
|
"eval_loss": 1.4710707664489746, |
|
"eval_runtime": 0.85, |
|
"eval_samples_per_second": 588.248, |
|
"eval_steps_per_second": 37.648, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 4.374985103086641e-05, |
|
"loss": 1.578, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_accuracy": 0.6888888888888889, |
|
"eval_loss": 1.5465725660324097, |
|
"eval_runtime": 0.8902, |
|
"eval_samples_per_second": 561.645, |
|
"eval_steps_per_second": 35.945, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 4.345191276367537e-05, |
|
"loss": 1.5492, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_accuracy": 0.6968894771674388, |
|
"eval_loss": 1.4628891944885254, |
|
"eval_runtime": 0.8368, |
|
"eval_samples_per_second": 597.487, |
|
"eval_steps_per_second": 38.239, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 4.3153974496484326e-05, |
|
"loss": 1.5291, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_accuracy": 0.7200132538104705, |
|
"eval_loss": 1.4264894723892212, |
|
"eval_runtime": 0.8798, |
|
"eval_samples_per_second": 568.319, |
|
"eval_steps_per_second": 36.372, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 4.285603622929329e-05, |
|
"loss": 1.5079, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_accuracy": 0.6966074313408723, |
|
"eval_loss": 1.5052707195281982, |
|
"eval_runtime": 0.8186, |
|
"eval_samples_per_second": 610.796, |
|
"eval_steps_per_second": 39.091, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 4.255809796210226e-05, |
|
"loss": 1.5283, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"eval_accuracy": 0.6902654867256637, |
|
"eval_loss": 1.5257039070129395, |
|
"eval_runtime": 0.8002, |
|
"eval_samples_per_second": 624.861, |
|
"eval_steps_per_second": 39.991, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 4.226015969491122e-05, |
|
"loss": 1.5141, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_accuracy": 0.6949898442789438, |
|
"eval_loss": 1.5063292980194092, |
|
"eval_runtime": 0.8654, |
|
"eval_samples_per_second": 577.759, |
|
"eval_steps_per_second": 36.977, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 4.1962221427720176e-05, |
|
"loss": 1.4979, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_accuracy": 0.6955945677376615, |
|
"eval_loss": 1.5636450052261353, |
|
"eval_runtime": 0.8149, |
|
"eval_samples_per_second": 613.582, |
|
"eval_steps_per_second": 39.269, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.1664283160529136e-05, |
|
"loss": 1.5294, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"eval_accuracy": 0.6835193696651346, |
|
"eval_loss": 1.587847113609314, |
|
"eval_runtime": 0.8296, |
|
"eval_samples_per_second": 602.733, |
|
"eval_steps_per_second": 38.575, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 4.13663448933381e-05, |
|
"loss": 1.4641, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"eval_accuracy": 0.6962067807989258, |
|
"eval_loss": 1.5574804544448853, |
|
"eval_runtime": 0.81, |
|
"eval_samples_per_second": 617.287, |
|
"eval_steps_per_second": 39.506, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 4.106840662614707e-05, |
|
"loss": 1.4754, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"eval_accuracy": 0.7006847081838931, |
|
"eval_loss": 1.4779187440872192, |
|
"eval_runtime": 0.8312, |
|
"eval_samples_per_second": 601.557, |
|
"eval_steps_per_second": 38.5, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 4.077046835895603e-05, |
|
"loss": 1.4696, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_accuracy": 0.6965271015903928, |
|
"eval_loss": 1.451996922492981, |
|
"eval_runtime": 0.7909, |
|
"eval_samples_per_second": 632.19, |
|
"eval_steps_per_second": 40.46, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 4.0472530091764986e-05, |
|
"loss": 1.4655, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"eval_accuracy": 0.683049147442327, |
|
"eval_loss": 1.6320295333862305, |
|
"eval_runtime": 0.8309, |
|
"eval_samples_per_second": 601.76, |
|
"eval_steps_per_second": 38.513, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 4.0174591824573945e-05, |
|
"loss": 1.4792, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"eval_accuracy": 0.7134165866154338, |
|
"eval_loss": 1.415226697921753, |
|
"eval_runtime": 0.8575, |
|
"eval_samples_per_second": 583.097, |
|
"eval_steps_per_second": 37.318, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 3.98772494339173e-05, |
|
"loss": 1.4379, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"eval_accuracy": 0.7041935483870968, |
|
"eval_loss": 1.4900156259536743, |
|
"eval_runtime": 0.8413, |
|
"eval_samples_per_second": 594.352, |
|
"eval_steps_per_second": 38.039, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 3.957931116672626e-05, |
|
"loss": 1.4281, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"eval_accuracy": 0.6989864864864865, |
|
"eval_loss": 1.5407416820526123, |
|
"eval_runtime": 0.8677, |
|
"eval_samples_per_second": 576.232, |
|
"eval_steps_per_second": 36.879, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 3.928137289953522e-05, |
|
"loss": 1.436, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"eval_accuracy": 0.6914175506268081, |
|
"eval_loss": 1.534258246421814, |
|
"eval_runtime": 0.843, |
|
"eval_samples_per_second": 593.143, |
|
"eval_steps_per_second": 37.961, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 3.8983434632344176e-05, |
|
"loss": 1.4342, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"eval_accuracy": 0.7023696682464455, |
|
"eval_loss": 1.5323561429977417, |
|
"eval_runtime": 0.7874, |
|
"eval_samples_per_second": 635.024, |
|
"eval_steps_per_second": 40.642, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 3.868549636515314e-05, |
|
"loss": 1.4176, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"eval_accuracy": 0.7132913490222075, |
|
"eval_loss": 1.4485751390457153, |
|
"eval_runtime": 0.8567, |
|
"eval_samples_per_second": 583.665, |
|
"eval_steps_per_second": 37.355, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 3.838755809796211e-05, |
|
"loss": 1.4308, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.7031503734978889, |
|
"eval_loss": 1.4598056077957153, |
|
"eval_runtime": 0.79, |
|
"eval_samples_per_second": 632.872, |
|
"eval_steps_per_second": 40.504, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 3.809021570730545e-05, |
|
"loss": 1.4014, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"eval_accuracy": 0.6938435940099834, |
|
"eval_loss": 1.575023889541626, |
|
"eval_runtime": 0.8292, |
|
"eval_samples_per_second": 603.024, |
|
"eval_steps_per_second": 38.594, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 3.779227744011441e-05, |
|
"loss": 1.3661, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"eval_accuracy": 0.6985221674876847, |
|
"eval_loss": 1.5403505563735962, |
|
"eval_runtime": 0.8319, |
|
"eval_samples_per_second": 601.063, |
|
"eval_steps_per_second": 38.468, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 3.7494935049457754e-05, |
|
"loss": 1.3857, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"eval_accuracy": 0.7037155669442665, |
|
"eval_loss": 1.4692307710647583, |
|
"eval_runtime": 0.8177, |
|
"eval_samples_per_second": 611.5, |
|
"eval_steps_per_second": 39.136, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 3.719699678226672e-05, |
|
"loss": 1.3846, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"eval_accuracy": 0.6941445861956166, |
|
"eval_loss": 1.5511342287063599, |
|
"eval_runtime": 0.7898, |
|
"eval_samples_per_second": 633.076, |
|
"eval_steps_per_second": 40.517, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 3.689905851507568e-05, |
|
"loss": 1.3867, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"eval_accuracy": 0.6925124792013311, |
|
"eval_loss": 1.5321439504623413, |
|
"eval_runtime": 0.8379, |
|
"eval_samples_per_second": 596.713, |
|
"eval_steps_per_second": 38.19, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 3.660112024788464e-05, |
|
"loss": 1.3658, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"eval_accuracy": 0.7020917678812416, |
|
"eval_loss": 1.5499885082244873, |
|
"eval_runtime": 0.8209, |
|
"eval_samples_per_second": 609.075, |
|
"eval_steps_per_second": 38.981, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 3.6303181980693604e-05, |
|
"loss": 1.3406, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_accuracy": 0.6959503592423253, |
|
"eval_loss": 1.523918628692627, |
|
"eval_runtime": 0.8298, |
|
"eval_samples_per_second": 602.525, |
|
"eval_steps_per_second": 38.562, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 3.600524371350256e-05, |
|
"loss": 1.3405, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"eval_accuracy": 0.7055256064690026, |
|
"eval_loss": 1.4414023160934448, |
|
"eval_runtime": 0.8516, |
|
"eval_samples_per_second": 587.105, |
|
"eval_steps_per_second": 37.575, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 3.570730544631153e-05, |
|
"loss": 1.3373, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"eval_accuracy": 0.6784238957737527, |
|
"eval_loss": 1.599377155303955, |
|
"eval_runtime": 0.791, |
|
"eval_samples_per_second": 632.109, |
|
"eval_steps_per_second": 40.455, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 3.540936717912049e-05, |
|
"loss": 1.3527, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"eval_accuracy": 0.6970387243735763, |
|
"eval_loss": 1.5105814933776855, |
|
"eval_runtime": 0.8594, |
|
"eval_samples_per_second": 581.797, |
|
"eval_steps_per_second": 37.235, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 3.511142891192945e-05, |
|
"loss": 1.3436, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"eval_accuracy": 0.7079758500158881, |
|
"eval_loss": 1.471426010131836, |
|
"eval_runtime": 0.8427, |
|
"eval_samples_per_second": 593.355, |
|
"eval_steps_per_second": 37.975, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 3.4813490644738414e-05, |
|
"loss": 1.3069, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"eval_accuracy": 0.6953099376844867, |
|
"eval_loss": 1.4990392923355103, |
|
"eval_runtime": 0.8575, |
|
"eval_samples_per_second": 583.12, |
|
"eval_steps_per_second": 37.32, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 3.451555237754737e-05, |
|
"loss": 1.2969, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"eval_accuracy": 0.6964285714285714, |
|
"eval_loss": 1.4809668064117432, |
|
"eval_runtime": 0.8312, |
|
"eval_samples_per_second": 601.512, |
|
"eval_steps_per_second": 38.497, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 3.421761411035634e-05, |
|
"loss": 1.3009, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"eval_accuracy": 0.6875602700096431, |
|
"eval_loss": 1.5964903831481934, |
|
"eval_runtime": 0.8752, |
|
"eval_samples_per_second": 571.296, |
|
"eval_steps_per_second": 36.563, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 3.392086759623406e-05, |
|
"loss": 1.3227, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"eval_accuracy": 0.7013662979830839, |
|
"eval_loss": 1.429559588432312, |
|
"eval_runtime": 0.7904, |
|
"eval_samples_per_second": 632.561, |
|
"eval_steps_per_second": 40.484, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 3.3622929329043025e-05, |
|
"loss": 1.3259, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"eval_accuracy": 0.7189224277831873, |
|
"eval_loss": 1.413652777671814, |
|
"eval_runtime": 0.8134, |
|
"eval_samples_per_second": 614.697, |
|
"eval_steps_per_second": 39.341, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 3.3324991061851985e-05, |
|
"loss": 1.3131, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"eval_accuracy": 0.7019570099454604, |
|
"eval_loss": 1.534200668334961, |
|
"eval_runtime": 0.8056, |
|
"eval_samples_per_second": 620.653, |
|
"eval_steps_per_second": 39.722, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 3.3027052794660944e-05, |
|
"loss": 1.271, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"eval_accuracy": 0.711340206185567, |
|
"eval_loss": 1.470828890800476, |
|
"eval_runtime": 0.7815, |
|
"eval_samples_per_second": 639.779, |
|
"eval_steps_per_second": 40.946, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 3.272911452746991e-05, |
|
"loss": 1.2684, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"eval_accuracy": 0.7045747422680413, |
|
"eval_loss": 1.4341672658920288, |
|
"eval_runtime": 0.7954, |
|
"eval_samples_per_second": 628.629, |
|
"eval_steps_per_second": 40.232, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 3.2431176260278876e-05, |
|
"loss": 1.2767, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"eval_accuracy": 0.709353000335233, |
|
"eval_loss": 1.4703407287597656, |
|
"eval_runtime": 0.8179, |
|
"eval_samples_per_second": 611.351, |
|
"eval_steps_per_second": 39.126, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 3.2133237993087835e-05, |
|
"loss": 1.2861, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"eval_accuracy": 0.7308937823834197, |
|
"eval_loss": 1.3323109149932861, |
|
"eval_runtime": 0.7855, |
|
"eval_samples_per_second": 636.523, |
|
"eval_steps_per_second": 40.737, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 3.1835299725896794e-05, |
|
"loss": 1.2617, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"eval_accuracy": 0.7003344481605351, |
|
"eval_loss": 1.4562044143676758, |
|
"eval_runtime": 0.7951, |
|
"eval_samples_per_second": 628.826, |
|
"eval_steps_per_second": 40.245, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 3.153736145870575e-05, |
|
"loss": 1.2551, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"eval_accuracy": 0.7169689119170984, |
|
"eval_loss": 1.4361472129821777, |
|
"eval_runtime": 0.8647, |
|
"eval_samples_per_second": 578.22, |
|
"eval_steps_per_second": 37.006, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"learning_rate": 3.124001906804911e-05, |
|
"loss": 1.2404, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"eval_accuracy": 0.7034617896799478, |
|
"eval_loss": 1.4536628723144531, |
|
"eval_runtime": 0.7907, |
|
"eval_samples_per_second": 632.325, |
|
"eval_steps_per_second": 40.469, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 3.0942080800858066e-05, |
|
"loss": 1.2562, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"eval_accuracy": 0.7132209980557356, |
|
"eval_loss": 1.4038574695587158, |
|
"eval_runtime": 0.7924, |
|
"eval_samples_per_second": 631.001, |
|
"eval_steps_per_second": 40.384, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"learning_rate": 3.0644142533667025e-05, |
|
"loss": 1.2489, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"eval_accuracy": 0.706418918918919, |
|
"eval_loss": 1.4372212886810303, |
|
"eval_runtime": 0.8024, |
|
"eval_samples_per_second": 623.122, |
|
"eval_steps_per_second": 39.88, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 3.0346204266475984e-05, |
|
"loss": 1.2406, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"eval_accuracy": 0.7087442472057857, |
|
"eval_loss": 1.4926137924194336, |
|
"eval_runtime": 0.8525, |
|
"eval_samples_per_second": 586.532, |
|
"eval_steps_per_second": 37.538, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 3.0048265999284947e-05, |
|
"loss": 1.2285, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"eval_accuracy": 0.7152005392652511, |
|
"eval_loss": 1.4080321788787842, |
|
"eval_runtime": 0.8108, |
|
"eval_samples_per_second": 616.703, |
|
"eval_steps_per_second": 39.469, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"learning_rate": 2.9750327732093913e-05, |
|
"loss": 1.2213, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"eval_accuracy": 0.7170240415854451, |
|
"eval_loss": 1.403072476387024, |
|
"eval_runtime": 0.8459, |
|
"eval_samples_per_second": 591.089, |
|
"eval_steps_per_second": 37.83, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"learning_rate": 2.9452389464902875e-05, |
|
"loss": 1.1998, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"eval_accuracy": 0.7222584856396866, |
|
"eval_loss": 1.3541438579559326, |
|
"eval_runtime": 0.7909, |
|
"eval_samples_per_second": 632.16, |
|
"eval_steps_per_second": 40.458, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 12.51, |
|
"learning_rate": 2.9154451197711835e-05, |
|
"loss": 1.2184, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 12.51, |
|
"eval_accuracy": 0.7308441558441559, |
|
"eval_loss": 1.3629957437515259, |
|
"eval_runtime": 0.8716, |
|
"eval_samples_per_second": 573.677, |
|
"eval_steps_per_second": 36.715, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"learning_rate": 2.8856512930520797e-05, |
|
"loss": 1.2195, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"eval_accuracy": 0.7281362594169669, |
|
"eval_loss": 1.312456488609314, |
|
"eval_runtime": 0.852, |
|
"eval_samples_per_second": 586.847, |
|
"eval_steps_per_second": 37.558, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 12.87, |
|
"learning_rate": 2.8558574663329756e-05, |
|
"loss": 1.2178, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 12.87, |
|
"eval_accuracy": 0.7119236883942767, |
|
"eval_loss": 1.4257023334503174, |
|
"eval_runtime": 0.8597, |
|
"eval_samples_per_second": 581.571, |
|
"eval_steps_per_second": 37.221, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 2.8260636396138722e-05, |
|
"loss": 1.1918, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"eval_accuracy": 0.7152686762778506, |
|
"eval_loss": 1.4108035564422607, |
|
"eval_runtime": 0.9192, |
|
"eval_samples_per_second": 543.96, |
|
"eval_steps_per_second": 34.813, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 13.23, |
|
"learning_rate": 2.7963294005482066e-05, |
|
"loss": 1.1664, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 13.23, |
|
"eval_accuracy": 0.7226588081204977, |
|
"eval_loss": 1.3577048778533936, |
|
"eval_runtime": 0.7887, |
|
"eval_samples_per_second": 633.948, |
|
"eval_steps_per_second": 40.573, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 2.7665355738291028e-05, |
|
"loss": 1.1754, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"eval_accuracy": 0.720593191776205, |
|
"eval_loss": 1.377700924873352, |
|
"eval_runtime": 0.8445, |
|
"eval_samples_per_second": 592.06, |
|
"eval_steps_per_second": 37.892, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"learning_rate": 2.7367417471099987e-05, |
|
"loss": 1.1855, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"eval_accuracy": 0.7354008578027054, |
|
"eval_loss": 1.350059151649475, |
|
"eval_runtime": 0.8109, |
|
"eval_samples_per_second": 616.607, |
|
"eval_steps_per_second": 39.463, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"learning_rate": 2.7070075080443334e-05, |
|
"loss": 1.1644, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"eval_accuracy": 0.7206685953069752, |
|
"eval_loss": 1.374656081199646, |
|
"eval_runtime": 0.8397, |
|
"eval_samples_per_second": 595.482, |
|
"eval_steps_per_second": 38.111, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 2.6772136813252297e-05, |
|
"loss": 1.1709, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"eval_accuracy": 0.7183739837398374, |
|
"eval_loss": 1.3703839778900146, |
|
"eval_runtime": 0.8025, |
|
"eval_samples_per_second": 623.038, |
|
"eval_steps_per_second": 39.874, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"learning_rate": 2.6474198546061256e-05, |
|
"loss": 1.1613, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"eval_accuracy": 0.7246875, |
|
"eval_loss": 1.4306718111038208, |
|
"eval_runtime": 0.8499, |
|
"eval_samples_per_second": 588.275, |
|
"eval_steps_per_second": 37.65, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 14.3, |
|
"learning_rate": 2.617626027887022e-05, |
|
"loss": 1.1443, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 14.3, |
|
"eval_accuracy": 0.7220978573712824, |
|
"eval_loss": 1.3189983367919922, |
|
"eval_runtime": 0.7903, |
|
"eval_samples_per_second": 632.651, |
|
"eval_steps_per_second": 40.49, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 2.5878322011679178e-05, |
|
"loss": 1.1356, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"eval_accuracy": 0.7331329325317302, |
|
"eval_loss": 1.3287793397903442, |
|
"eval_runtime": 0.7921, |
|
"eval_samples_per_second": 631.257, |
|
"eval_steps_per_second": 40.4, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 14.66, |
|
"learning_rate": 2.5580383744488147e-05, |
|
"loss": 1.1493, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 14.66, |
|
"eval_accuracy": 0.7240227196792516, |
|
"eval_loss": 1.3504801988601685, |
|
"eval_runtime": 0.8432, |
|
"eval_samples_per_second": 592.975, |
|
"eval_steps_per_second": 37.95, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 14.84, |
|
"learning_rate": 2.5283041353831487e-05, |
|
"loss": 1.1417, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 14.84, |
|
"eval_accuracy": 0.7320369149637442, |
|
"eval_loss": 1.31459379196167, |
|
"eval_runtime": 0.8272, |
|
"eval_samples_per_second": 604.463, |
|
"eval_steps_per_second": 38.686, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 2.498569896317483e-05, |
|
"loss": 1.1349, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"eval_accuracy": 0.7333114107201578, |
|
"eval_loss": 1.3545522689819336, |
|
"eval_runtime": 0.8634, |
|
"eval_samples_per_second": 579.106, |
|
"eval_steps_per_second": 37.063, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"learning_rate": 2.4687760695983793e-05, |
|
"loss": 1.1169, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"eval_accuracy": 0.7246922024623803, |
|
"eval_loss": 1.37086021900177, |
|
"eval_runtime": 0.8611, |
|
"eval_samples_per_second": 580.685, |
|
"eval_steps_per_second": 37.164, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"learning_rate": 2.4390418305327136e-05, |
|
"loss": 1.1187, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"eval_accuracy": 0.7217795484727756, |
|
"eval_loss": 1.4242717027664185, |
|
"eval_runtime": 0.8265, |
|
"eval_samples_per_second": 604.985, |
|
"eval_steps_per_second": 38.719, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 15.55, |
|
"learning_rate": 2.4092480038136102e-05, |
|
"loss": 1.118, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 15.55, |
|
"eval_accuracy": 0.7264245251582806, |
|
"eval_loss": 1.3835431337356567, |
|
"eval_runtime": 0.8374, |
|
"eval_samples_per_second": 597.064, |
|
"eval_steps_per_second": 38.212, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 15.73, |
|
"learning_rate": 2.379454177094506e-05, |
|
"loss": 1.1165, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 15.73, |
|
"eval_accuracy": 0.7253818654533637, |
|
"eval_loss": 1.3239895105361938, |
|
"eval_runtime": 0.8499, |
|
"eval_samples_per_second": 588.29, |
|
"eval_steps_per_second": 37.651, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 2.3496603503754024e-05, |
|
"loss": 1.114, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"eval_accuracy": 0.7382113821138211, |
|
"eval_loss": 1.3263858556747437, |
|
"eval_runtime": 0.8424, |
|
"eval_samples_per_second": 593.546, |
|
"eval_steps_per_second": 37.987, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"learning_rate": 2.3198665236562986e-05, |
|
"loss": 1.105, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"eval_accuracy": 0.7333548804137039, |
|
"eval_loss": 1.3213739395141602, |
|
"eval_runtime": 0.8677, |
|
"eval_samples_per_second": 576.224, |
|
"eval_steps_per_second": 36.878, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"learning_rate": 2.2900726969371946e-05, |
|
"loss": 1.0924, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"eval_accuracy": 0.7282392026578073, |
|
"eval_loss": 1.384667992591858, |
|
"eval_runtime": 0.9421, |
|
"eval_samples_per_second": 530.704, |
|
"eval_steps_per_second": 33.965, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 16.45, |
|
"learning_rate": 2.260278870218091e-05, |
|
"loss": 1.0915, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 16.45, |
|
"eval_accuracy": 0.7317073170731707, |
|
"eval_loss": 1.3603721857070923, |
|
"eval_runtime": 0.7951, |
|
"eval_samples_per_second": 628.874, |
|
"eval_steps_per_second": 40.248, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"learning_rate": 2.230485043498987e-05, |
|
"loss": 1.0968, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"eval_accuracy": 0.7319177173191772, |
|
"eval_loss": 1.3539705276489258, |
|
"eval_runtime": 0.8815, |
|
"eval_samples_per_second": 567.187, |
|
"eval_steps_per_second": 36.3, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 2.2006912167798833e-05, |
|
"loss": 1.0772, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"eval_accuracy": 0.7306332369013179, |
|
"eval_loss": 1.2475004196166992, |
|
"eval_runtime": 0.8301, |
|
"eval_samples_per_second": 602.308, |
|
"eval_steps_per_second": 38.548, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 2.1708973900607796e-05, |
|
"loss": 1.0975, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_accuracy": 0.7448207826372903, |
|
"eval_loss": 1.2635700702667236, |
|
"eval_runtime": 0.8269, |
|
"eval_samples_per_second": 604.655, |
|
"eval_steps_per_second": 38.698, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"learning_rate": 2.1411035633416755e-05, |
|
"loss": 1.0708, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"eval_accuracy": 0.7182085648904871, |
|
"eval_loss": 1.4056382179260254, |
|
"eval_runtime": 0.8973, |
|
"eval_samples_per_second": 557.236, |
|
"eval_steps_per_second": 35.663, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 17.34, |
|
"learning_rate": 2.111309736622572e-05, |
|
"loss": 1.0654, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 17.34, |
|
"eval_accuracy": 0.727630285152409, |
|
"eval_loss": 1.3769292831420898, |
|
"eval_runtime": 0.8377, |
|
"eval_samples_per_second": 596.886, |
|
"eval_steps_per_second": 38.201, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 2.081515909903468e-05, |
|
"loss": 1.0676, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"eval_accuracy": 0.7224234441883438, |
|
"eval_loss": 1.33571457862854, |
|
"eval_runtime": 0.7909, |
|
"eval_samples_per_second": 632.166, |
|
"eval_steps_per_second": 40.459, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 2.0517220831843643e-05, |
|
"loss": 1.0507, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"eval_accuracy": 0.712369109947644, |
|
"eval_loss": 1.4087713956832886, |
|
"eval_runtime": 0.7955, |
|
"eval_samples_per_second": 628.504, |
|
"eval_steps_per_second": 40.224, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 17.88, |
|
"learning_rate": 2.0219282564652605e-05, |
|
"loss": 1.0424, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 17.88, |
|
"eval_accuracy": 0.7314667515112949, |
|
"eval_loss": 1.3146371841430664, |
|
"eval_runtime": 0.7881, |
|
"eval_samples_per_second": 634.428, |
|
"eval_steps_per_second": 40.603, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"learning_rate": 1.9921344297461568e-05, |
|
"loss": 1.0524, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"eval_accuracy": 0.7393395319012503, |
|
"eval_loss": 1.28960382938385, |
|
"eval_runtime": 0.8581, |
|
"eval_samples_per_second": 582.683, |
|
"eval_steps_per_second": 37.292, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 18.23, |
|
"learning_rate": 1.962340603027053e-05, |
|
"loss": 1.0349, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 18.23, |
|
"eval_accuracy": 0.7191558441558441, |
|
"eval_loss": 1.3986730575561523, |
|
"eval_runtime": 0.7904, |
|
"eval_samples_per_second": 632.599, |
|
"eval_steps_per_second": 40.486, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"learning_rate": 1.932546776307949e-05, |
|
"loss": 1.0217, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"eval_accuracy": 0.7380645161290322, |
|
"eval_loss": 1.2937612533569336, |
|
"eval_runtime": 0.8575, |
|
"eval_samples_per_second": 583.089, |
|
"eval_steps_per_second": 37.318, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"learning_rate": 1.9028125372422833e-05, |
|
"loss": 1.0238, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"eval_accuracy": 0.738654650788542, |
|
"eval_loss": 1.296163558959961, |
|
"eval_runtime": 0.8423, |
|
"eval_samples_per_second": 593.617, |
|
"eval_steps_per_second": 37.992, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 18.77, |
|
"learning_rate": 1.87301871052318e-05, |
|
"loss": 1.0292, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 18.77, |
|
"eval_accuracy": 0.737131757850437, |
|
"eval_loss": 1.3194587230682373, |
|
"eval_runtime": 0.8232, |
|
"eval_samples_per_second": 607.358, |
|
"eval_steps_per_second": 38.871, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 18.95, |
|
"learning_rate": 1.8433440591109523e-05, |
|
"loss": 1.0426, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 18.95, |
|
"eval_accuracy": 0.7411687025420931, |
|
"eval_loss": 1.2835460901260376, |
|
"eval_runtime": 0.7859, |
|
"eval_samples_per_second": 636.221, |
|
"eval_steps_per_second": 40.718, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 1.8135502323918486e-05, |
|
"loss": 1.0196, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"eval_accuracy": 0.747275204359673, |
|
"eval_loss": 1.234621524810791, |
|
"eval_runtime": 0.8361, |
|
"eval_samples_per_second": 597.997, |
|
"eval_steps_per_second": 38.272, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"learning_rate": 1.7837564056727445e-05, |
|
"loss": 1.012, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"eval_accuracy": 0.7338292367399741, |
|
"eval_loss": 1.3665757179260254, |
|
"eval_runtime": 0.8157, |
|
"eval_samples_per_second": 612.938, |
|
"eval_steps_per_second": 39.228, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"learning_rate": 1.753962578953641e-05, |
|
"loss": 1.0256, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"eval_accuracy": 0.7364842991259307, |
|
"eval_loss": 1.3140363693237305, |
|
"eval_runtime": 0.7949, |
|
"eval_samples_per_second": 628.974, |
|
"eval_steps_per_second": 40.254, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 19.66, |
|
"learning_rate": 1.724168752234537e-05, |
|
"loss": 0.9824, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 19.66, |
|
"eval_accuracy": 0.7416496250852079, |
|
"eval_loss": 1.2764383554458618, |
|
"eval_runtime": 0.8178, |
|
"eval_samples_per_second": 611.417, |
|
"eval_steps_per_second": 39.131, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 19.84, |
|
"learning_rate": 1.6943749255154336e-05, |
|
"loss": 1.0048, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 19.84, |
|
"eval_accuracy": 0.7487891507910881, |
|
"eval_loss": 1.2514091730117798, |
|
"eval_runtime": 0.8164, |
|
"eval_samples_per_second": 612.474, |
|
"eval_steps_per_second": 39.198, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"learning_rate": 1.6645810987963295e-05, |
|
"loss": 0.9947, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"eval_accuracy": 0.7431572246976448, |
|
"eval_loss": 1.3350915908813477, |
|
"eval_runtime": 0.7912, |
|
"eval_samples_per_second": 631.988, |
|
"eval_steps_per_second": 40.447, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 20.2, |
|
"learning_rate": 1.634846859730664e-05, |
|
"loss": 0.977, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 20.2, |
|
"eval_accuracy": 0.7451045469631596, |
|
"eval_loss": 1.2854044437408447, |
|
"eval_runtime": 0.8499, |
|
"eval_samples_per_second": 588.28, |
|
"eval_steps_per_second": 37.65, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 20.38, |
|
"learning_rate": 1.60505303301156e-05, |
|
"loss": 0.9862, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 20.38, |
|
"eval_accuracy": 0.7285475792988314, |
|
"eval_loss": 1.366584300994873, |
|
"eval_runtime": 0.816, |
|
"eval_samples_per_second": 612.774, |
|
"eval_steps_per_second": 39.218, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 20.56, |
|
"learning_rate": 1.5752592062924564e-05, |
|
"loss": 0.9699, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 20.56, |
|
"eval_accuracy": 0.7347811780190853, |
|
"eval_loss": 1.3123427629470825, |
|
"eval_runtime": 0.7779, |
|
"eval_samples_per_second": 642.731, |
|
"eval_steps_per_second": 41.135, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 20.74, |
|
"learning_rate": 1.5454653795733526e-05, |
|
"loss": 0.977, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 20.74, |
|
"eval_accuracy": 0.7254770672915969, |
|
"eval_loss": 1.3425793647766113, |
|
"eval_runtime": 0.8285, |
|
"eval_samples_per_second": 603.485, |
|
"eval_steps_per_second": 38.623, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"learning_rate": 1.5157311405076868e-05, |
|
"loss": 0.9749, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"eval_accuracy": 0.7296604740550929, |
|
"eval_loss": 1.3763371706008911, |
|
"eval_runtime": 0.7855, |
|
"eval_samples_per_second": 636.556, |
|
"eval_steps_per_second": 40.74, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 21.09, |
|
"learning_rate": 1.4859373137885832e-05, |
|
"loss": 0.9505, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 21.09, |
|
"eval_accuracy": 0.7434469200524246, |
|
"eval_loss": 1.2372225522994995, |
|
"eval_runtime": 0.7967, |
|
"eval_samples_per_second": 627.592, |
|
"eval_steps_per_second": 40.166, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 21.27, |
|
"learning_rate": 1.4561434870694793e-05, |
|
"loss": 0.9438, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 21.27, |
|
"eval_accuracy": 0.7159090909090909, |
|
"eval_loss": 1.433412790298462, |
|
"eval_runtime": 0.7929, |
|
"eval_samples_per_second": 630.567, |
|
"eval_steps_per_second": 40.356, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 21.45, |
|
"learning_rate": 1.4263496603503754e-05, |
|
"loss": 0.944, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 21.45, |
|
"eval_accuracy": 0.7507936507936508, |
|
"eval_loss": 1.269033432006836, |
|
"eval_runtime": 0.8274, |
|
"eval_samples_per_second": 604.314, |
|
"eval_steps_per_second": 38.676, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 21.63, |
|
"learning_rate": 1.3965558336312718e-05, |
|
"loss": 0.9427, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 21.63, |
|
"eval_accuracy": 0.7485941118094608, |
|
"eval_loss": 1.2185914516448975, |
|
"eval_runtime": 0.7923, |
|
"eval_samples_per_second": 631.05, |
|
"eval_steps_per_second": 40.387, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 21.81, |
|
"learning_rate": 1.3667620069121679e-05, |
|
"loss": 0.9553, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 21.81, |
|
"eval_accuracy": 0.726882430647292, |
|
"eval_loss": 1.3940554857254028, |
|
"eval_runtime": 0.7961, |
|
"eval_samples_per_second": 628.083, |
|
"eval_steps_per_second": 40.197, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 21.99, |
|
"learning_rate": 1.3369681801930641e-05, |
|
"loss": 0.9571, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 21.99, |
|
"eval_accuracy": 0.7273940607273941, |
|
"eval_loss": 1.4162867069244385, |
|
"eval_runtime": 0.791, |
|
"eval_samples_per_second": 632.128, |
|
"eval_steps_per_second": 40.456, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 22.17, |
|
"learning_rate": 1.3071743534739602e-05, |
|
"loss": 0.932, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 22.17, |
|
"eval_accuracy": 0.7522727272727273, |
|
"eval_loss": 1.2717351913452148, |
|
"eval_runtime": 0.796, |
|
"eval_samples_per_second": 628.103, |
|
"eval_steps_per_second": 40.199, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 22.35, |
|
"learning_rate": 1.2773805267548563e-05, |
|
"loss": 0.9166, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 22.35, |
|
"eval_accuracy": 0.73956326268465, |
|
"eval_loss": 1.217714786529541, |
|
"eval_runtime": 0.8289, |
|
"eval_samples_per_second": 603.185, |
|
"eval_steps_per_second": 38.604, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 22.52, |
|
"learning_rate": 1.2475867000357526e-05, |
|
"loss": 0.9301, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 22.52, |
|
"eval_accuracy": 0.7377950210151956, |
|
"eval_loss": 1.3264496326446533, |
|
"eval_runtime": 0.8524, |
|
"eval_samples_per_second": 586.56, |
|
"eval_steps_per_second": 37.54, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 22.7, |
|
"learning_rate": 1.2177928733166488e-05, |
|
"loss": 0.9351, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 22.7, |
|
"eval_accuracy": 0.752010292698617, |
|
"eval_loss": 1.2570440769195557, |
|
"eval_runtime": 0.785, |
|
"eval_samples_per_second": 636.94, |
|
"eval_steps_per_second": 40.764, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 22.88, |
|
"learning_rate": 1.1879990465975451e-05, |
|
"loss": 0.9211, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 22.88, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 1.2638896703720093, |
|
"eval_runtime": 0.8753, |
|
"eval_samples_per_second": 571.265, |
|
"eval_steps_per_second": 36.561, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 23.06, |
|
"learning_rate": 1.1582052198784414e-05, |
|
"loss": 0.9211, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 23.06, |
|
"eval_accuracy": 0.7605543022881083, |
|
"eval_loss": 1.2376515865325928, |
|
"eval_runtime": 0.7946, |
|
"eval_samples_per_second": 629.265, |
|
"eval_steps_per_second": 40.273, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 23.24, |
|
"learning_rate": 1.1284113931593374e-05, |
|
"loss": 0.9196, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 23.24, |
|
"eval_accuracy": 0.7485168094924193, |
|
"eval_loss": 1.2738728523254395, |
|
"eval_runtime": 0.8576, |
|
"eval_samples_per_second": 583.036, |
|
"eval_steps_per_second": 37.314, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 23.42, |
|
"learning_rate": 1.098677154093672e-05, |
|
"loss": 0.9062, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 23.42, |
|
"eval_accuracy": 0.7365366010964205, |
|
"eval_loss": 1.3262896537780762, |
|
"eval_runtime": 0.8401, |
|
"eval_samples_per_second": 595.164, |
|
"eval_steps_per_second": 38.09, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"learning_rate": 1.068883327374568e-05, |
|
"loss": 0.8965, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"eval_accuracy": 0.7455209024552091, |
|
"eval_loss": 1.2814128398895264, |
|
"eval_runtime": 0.778, |
|
"eval_samples_per_second": 642.691, |
|
"eval_steps_per_second": 41.132, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 23.78, |
|
"learning_rate": 1.0392086759623406e-05, |
|
"loss": 0.9004, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 23.78, |
|
"eval_accuracy": 0.7561779242174629, |
|
"eval_loss": 1.2108628749847412, |
|
"eval_runtime": 0.8669, |
|
"eval_samples_per_second": 576.736, |
|
"eval_steps_per_second": 36.911, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 23.95, |
|
"learning_rate": 1.0094148492432369e-05, |
|
"loss": 0.9094, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 23.95, |
|
"eval_accuracy": 0.7528089887640449, |
|
"eval_loss": 1.2629289627075195, |
|
"eval_runtime": 0.8653, |
|
"eval_samples_per_second": 577.859, |
|
"eval_steps_per_second": 36.983, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 24.13, |
|
"learning_rate": 9.79621022524133e-06, |
|
"loss": 0.8937, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 24.13, |
|
"eval_accuracy": 0.7375168690958165, |
|
"eval_loss": 1.2770532369613647, |
|
"eval_runtime": 0.8492, |
|
"eval_samples_per_second": 588.814, |
|
"eval_steps_per_second": 37.684, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 24.31, |
|
"learning_rate": 9.498271958050292e-06, |
|
"loss": 0.8711, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 24.31, |
|
"eval_accuracy": 0.7353233830845771, |
|
"eval_loss": 1.3746039867401123, |
|
"eval_runtime": 0.7929, |
|
"eval_samples_per_second": 630.629, |
|
"eval_steps_per_second": 40.36, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 24.49, |
|
"learning_rate": 9.200333690859255e-06, |
|
"loss": 0.8972, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 24.49, |
|
"eval_accuracy": 0.7453750420450723, |
|
"eval_loss": 1.2529133558273315, |
|
"eval_runtime": 0.8497, |
|
"eval_samples_per_second": 588.462, |
|
"eval_steps_per_second": 37.662, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 24.67, |
|
"learning_rate": 8.902395423668217e-06, |
|
"loss": 0.8863, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 24.67, |
|
"eval_accuracy": 0.7359154929577465, |
|
"eval_loss": 1.3219196796417236, |
|
"eval_runtime": 0.8149, |
|
"eval_samples_per_second": 613.598, |
|
"eval_steps_per_second": 39.27, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 24.85, |
|
"learning_rate": 8.604457156477178e-06, |
|
"loss": 0.8823, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 24.85, |
|
"eval_accuracy": 0.7367235275185066, |
|
"eval_loss": 1.313620924949646, |
|
"eval_runtime": 0.8311, |
|
"eval_samples_per_second": 601.621, |
|
"eval_steps_per_second": 38.504, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 25.03, |
|
"learning_rate": 8.306518889286139e-06, |
|
"loss": 0.8759, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 25.03, |
|
"eval_accuracy": 0.7427812811151676, |
|
"eval_loss": 1.3151708841323853, |
|
"eval_runtime": 0.7986, |
|
"eval_samples_per_second": 626.093, |
|
"eval_steps_per_second": 40.07, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 25.21, |
|
"learning_rate": 8.008580622095102e-06, |
|
"loss": 0.8722, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 25.21, |
|
"eval_accuracy": 0.7569644572526417, |
|
"eval_loss": 1.3108021020889282, |
|
"eval_runtime": 0.8281, |
|
"eval_samples_per_second": 603.782, |
|
"eval_steps_per_second": 38.642, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 25.38, |
|
"learning_rate": 7.710642354904064e-06, |
|
"loss": 0.8548, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 25.38, |
|
"eval_accuracy": 0.7367716008037508, |
|
"eval_loss": 1.3503183126449585, |
|
"eval_runtime": 0.7871, |
|
"eval_samples_per_second": 635.233, |
|
"eval_steps_per_second": 40.655, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 25.56, |
|
"learning_rate": 7.412704087713027e-06, |
|
"loss": 0.8728, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 25.56, |
|
"eval_accuracy": 0.7402768622280818, |
|
"eval_loss": 1.3091211318969727, |
|
"eval_runtime": 0.8581, |
|
"eval_samples_per_second": 582.712, |
|
"eval_steps_per_second": 37.294, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 25.74, |
|
"learning_rate": 7.114765820521989e-06, |
|
"loss": 0.8633, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 25.74, |
|
"eval_accuracy": 0.7416481069042317, |
|
"eval_loss": 1.2952070236206055, |
|
"eval_runtime": 0.8515, |
|
"eval_samples_per_second": 587.213, |
|
"eval_steps_per_second": 37.582, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 25.92, |
|
"learning_rate": 6.816827553330949e-06, |
|
"loss": 0.8612, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 25.92, |
|
"eval_accuracy": 0.7719072164948454, |
|
"eval_loss": 1.1612097024917603, |
|
"eval_runtime": 0.7967, |
|
"eval_samples_per_second": 627.618, |
|
"eval_steps_per_second": 40.168, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 26.1, |
|
"learning_rate": 6.5194851626742935e-06, |
|
"loss": 0.8677, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 26.1, |
|
"eval_accuracy": 0.7449731903485255, |
|
"eval_loss": 1.2855061292648315, |
|
"eval_runtime": 0.8112, |
|
"eval_samples_per_second": 616.391, |
|
"eval_steps_per_second": 39.449, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 26.28, |
|
"learning_rate": 6.2221427720176384e-06, |
|
"loss": 0.8526, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 26.28, |
|
"eval_accuracy": 0.7544929396662388, |
|
"eval_loss": 1.297914981842041, |
|
"eval_runtime": 0.8472, |
|
"eval_samples_per_second": 590.203, |
|
"eval_steps_per_second": 37.773, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 26.46, |
|
"learning_rate": 5.9242045048266e-06, |
|
"loss": 0.8594, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 26.46, |
|
"eval_accuracy": 0.7598070739549839, |
|
"eval_loss": 1.2569819688796997, |
|
"eval_runtime": 0.7923, |
|
"eval_samples_per_second": 631.066, |
|
"eval_steps_per_second": 40.388, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 26.64, |
|
"learning_rate": 5.626266237635562e-06, |
|
"loss": 0.8481, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 26.64, |
|
"eval_accuracy": 0.7491992312620115, |
|
"eval_loss": 1.2336714267730713, |
|
"eval_runtime": 0.8668, |
|
"eval_samples_per_second": 576.839, |
|
"eval_steps_per_second": 36.918, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 26.81, |
|
"learning_rate": 5.3283279704445245e-06, |
|
"loss": 0.855, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 26.81, |
|
"eval_accuracy": 0.7443507588532884, |
|
"eval_loss": 1.2874828577041626, |
|
"eval_runtime": 0.7926, |
|
"eval_samples_per_second": 630.803, |
|
"eval_steps_per_second": 40.371, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"learning_rate": 5.030389703253486e-06, |
|
"loss": 0.835, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"eval_accuracy": 0.7584731819677526, |
|
"eval_loss": 1.2270281314849854, |
|
"eval_runtime": 0.8172, |
|
"eval_samples_per_second": 611.826, |
|
"eval_steps_per_second": 39.157, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 27.17, |
|
"learning_rate": 4.732451436062448e-06, |
|
"loss": 0.8309, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 27.17, |
|
"eval_accuracy": 0.7389322916666666, |
|
"eval_loss": 1.2539992332458496, |
|
"eval_runtime": 0.8357, |
|
"eval_samples_per_second": 598.292, |
|
"eval_steps_per_second": 38.291, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 27.35, |
|
"learning_rate": 4.43451316887141e-06, |
|
"loss": 0.8326, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 27.35, |
|
"eval_accuracy": 0.7374631268436578, |
|
"eval_loss": 1.3610546588897705, |
|
"eval_runtime": 0.7953, |
|
"eval_samples_per_second": 628.676, |
|
"eval_steps_per_second": 40.235, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 27.53, |
|
"learning_rate": 4.136574901680372e-06, |
|
"loss": 0.8398, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 27.53, |
|
"eval_accuracy": 0.7504918032786885, |
|
"eval_loss": 1.2247506380081177, |
|
"eval_runtime": 0.859, |
|
"eval_samples_per_second": 582.099, |
|
"eval_steps_per_second": 37.254, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 27.71, |
|
"learning_rate": 3.838636634489334e-06, |
|
"loss": 0.8304, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 27.71, |
|
"eval_accuracy": 0.7607282184655396, |
|
"eval_loss": 1.2403171062469482, |
|
"eval_runtime": 0.9471, |
|
"eval_samples_per_second": 527.922, |
|
"eval_steps_per_second": 33.787, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 27.89, |
|
"learning_rate": 3.5406983672982957e-06, |
|
"loss": 0.8373, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 27.89, |
|
"eval_accuracy": 0.7611295681063123, |
|
"eval_loss": 1.1708660125732422, |
|
"eval_runtime": 0.8284, |
|
"eval_samples_per_second": 603.609, |
|
"eval_steps_per_second": 38.631, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 28.07, |
|
"learning_rate": 3.2427601001072583e-06, |
|
"loss": 0.8462, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 28.07, |
|
"eval_accuracy": 0.7508185985592666, |
|
"eval_loss": 1.289104700088501, |
|
"eval_runtime": 0.8603, |
|
"eval_samples_per_second": 581.16, |
|
"eval_steps_per_second": 37.194, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 28.24, |
|
"learning_rate": 2.945417709450602e-06, |
|
"loss": 0.8259, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 28.24, |
|
"eval_accuracy": 0.7500814597588791, |
|
"eval_loss": 1.2452012300491333, |
|
"eval_runtime": 0.8046, |
|
"eval_samples_per_second": 621.394, |
|
"eval_steps_per_second": 39.769, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 28.42, |
|
"learning_rate": 2.647479442259564e-06, |
|
"loss": 0.8334, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 28.42, |
|
"eval_accuracy": 0.746810598626104, |
|
"eval_loss": 1.2985996007919312, |
|
"eval_runtime": 0.9197, |
|
"eval_samples_per_second": 543.676, |
|
"eval_steps_per_second": 34.795, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 28.6, |
|
"learning_rate": 2.349541175068526e-06, |
|
"loss": 0.8115, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 28.6, |
|
"eval_accuracy": 0.7514638906961614, |
|
"eval_loss": 1.2879589796066284, |
|
"eval_runtime": 0.7986, |
|
"eval_samples_per_second": 626.129, |
|
"eval_steps_per_second": 40.072, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 28.78, |
|
"learning_rate": 2.0516029078774876e-06, |
|
"loss": 0.8205, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 28.78, |
|
"eval_accuracy": 0.75615359369872, |
|
"eval_loss": 1.2727956771850586, |
|
"eval_runtime": 0.8652, |
|
"eval_samples_per_second": 577.899, |
|
"eval_steps_per_second": 36.986, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 28.96, |
|
"learning_rate": 1.7536646406864498e-06, |
|
"loss": 0.8261, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 28.96, |
|
"eval_accuracy": 0.7523561910952227, |
|
"eval_loss": 1.2660555839538574, |
|
"eval_runtime": 0.7893, |
|
"eval_samples_per_second": 633.494, |
|
"eval_steps_per_second": 40.544, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 29.14, |
|
"learning_rate": 1.4563222500297937e-06, |
|
"loss": 0.8299, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 29.14, |
|
"eval_accuracy": 0.7486106570774763, |
|
"eval_loss": 1.25924813747406, |
|
"eval_runtime": 0.8513, |
|
"eval_samples_per_second": 587.342, |
|
"eval_steps_per_second": 37.59, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 29.32, |
|
"learning_rate": 1.1583839828387559e-06, |
|
"loss": 0.8276, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 29.32, |
|
"eval_accuracy": 0.7529644268774703, |
|
"eval_loss": 1.2325080633163452, |
|
"eval_runtime": 0.8587, |
|
"eval_samples_per_second": 582.291, |
|
"eval_steps_per_second": 37.267, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 29.5, |
|
"learning_rate": 8.604457156477178e-07, |
|
"loss": 0.8112, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 29.5, |
|
"eval_accuracy": 0.7477890599410416, |
|
"eval_loss": 1.3154096603393555, |
|
"eval_runtime": 0.8166, |
|
"eval_samples_per_second": 612.267, |
|
"eval_steps_per_second": 39.185, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 29.67, |
|
"learning_rate": 5.625074484566799e-07, |
|
"loss": 0.8111, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 29.67, |
|
"eval_accuracy": 0.740531561461794, |
|
"eval_loss": 1.3342524766921997, |
|
"eval_runtime": 0.8076, |
|
"eval_samples_per_second": 619.083, |
|
"eval_steps_per_second": 39.621, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 29.85, |
|
"learning_rate": 2.645691812656418e-07, |
|
"loss": 0.8148, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 29.85, |
|
"eval_accuracy": 0.7484622855292975, |
|
"eval_loss": 1.2806158065795898, |
|
"eval_runtime": 0.8122, |
|
"eval_samples_per_second": 615.596, |
|
"eval_steps_per_second": 39.398, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 83910, |
|
"total_flos": 3.583580261367381e+17, |
|
"train_loss": 1.1746184680817338, |
|
"train_runtime": 16410.0948, |
|
"train_samples_per_second": 163.619, |
|
"train_steps_per_second": 5.113 |
|
} |
|
], |
|
"max_steps": 83910, |
|
"num_train_epochs": 30, |
|
"total_flos": 3.583580261367381e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|