{ "best_metric": 0.7719072164948454, "best_model_checkpoint": "roberta-large-movies/checkpoint-72500", "epoch": 30.0, "global_step": 83910, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 4.970504111548088e-05, "loss": 1.7698, "step": 500 }, { "epoch": 0.18, "eval_accuracy": 0.6738421395955643, "eval_loss": 1.6167851686477661, "eval_runtime": 0.8246, "eval_samples_per_second": 606.37, "eval_steps_per_second": 38.808, "step": 500 }, { "epoch": 0.36, "learning_rate": 4.94082946013586e-05, "loss": 1.7761, "step": 1000 }, { "epoch": 0.36, "eval_accuracy": 0.6829508196721311, "eval_loss": 1.6522468328475952, "eval_runtime": 0.7873, "eval_samples_per_second": 635.049, "eval_steps_per_second": 40.643, "step": 1000 }, { "epoch": 0.54, "learning_rate": 4.9110356334167565e-05, "loss": 1.7626, "step": 1500 }, { "epoch": 0.54, "eval_accuracy": 0.6660117878192534, "eval_loss": 1.6534239053726196, "eval_runtime": 0.7869, "eval_samples_per_second": 635.425, "eval_steps_per_second": 40.667, "step": 1500 }, { "epoch": 0.72, "learning_rate": 4.8812418066976524e-05, "loss": 1.7602, "step": 2000 }, { "epoch": 0.72, "eval_accuracy": 0.6787299419597133, "eval_loss": 1.6575504541397095, "eval_runtime": 0.7882, "eval_samples_per_second": 634.385, "eval_steps_per_second": 40.601, "step": 2000 }, { "epoch": 0.89, "learning_rate": 4.851447979978549e-05, "loss": 1.7587, "step": 2500 }, { "epoch": 0.89, "eval_accuracy": 0.6772697150430749, "eval_loss": 1.6266298294067383, "eval_runtime": 0.7893, "eval_samples_per_second": 633.509, "eval_steps_per_second": 40.545, "step": 2500 }, { "epoch": 1.07, "learning_rate": 4.821654153259445e-05, "loss": 1.7047, "step": 3000 }, { "epoch": 1.07, "eval_accuracy": 0.6851971557853911, "eval_loss": 1.605985164642334, "eval_runtime": 0.8181, "eval_samples_per_second": 611.179, "eval_steps_per_second": 39.115, "step": 3000 }, { "epoch": 1.25, "learning_rate": 4.791860326540341e-05, "loss": 1.6782, "step": 3500 }, { "epoch": 1.25, "eval_accuracy": 0.6906354515050167, "eval_loss": 1.599035382270813, "eval_runtime": 0.8184, "eval_samples_per_second": 610.967, "eval_steps_per_second": 39.102, "step": 3500 }, { "epoch": 1.43, "learning_rate": 4.7620664998212375e-05, "loss": 1.6733, "step": 4000 }, { "epoch": 1.43, "eval_accuracy": 0.6967426710097719, "eval_loss": 1.5377483367919922, "eval_runtime": 0.819, "eval_samples_per_second": 610.521, "eval_steps_per_second": 39.073, "step": 4000 }, { "epoch": 1.61, "learning_rate": 4.7322726731021334e-05, "loss": 1.6664, "step": 4500 }, { "epoch": 1.61, "eval_accuracy": 0.6746607762701168, "eval_loss": 1.6434643268585205, "eval_runtime": 0.7966, "eval_samples_per_second": 627.631, "eval_steps_per_second": 40.168, "step": 4500 }, { "epoch": 1.79, "learning_rate": 4.70247884638303e-05, "loss": 1.6719, "step": 5000 }, { "epoch": 1.79, "eval_accuracy": 0.6907181571815718, "eval_loss": 1.483905553817749, "eval_runtime": 0.7989, "eval_samples_per_second": 625.841, "eval_steps_per_second": 40.054, "step": 5000 }, { "epoch": 1.97, "learning_rate": 4.672685019663926e-05, "loss": 1.6502, "step": 5500 }, { "epoch": 1.97, "eval_accuracy": 0.6896661367249602, "eval_loss": 1.535127878189087, "eval_runtime": 0.823, "eval_samples_per_second": 607.558, "eval_steps_per_second": 38.884, "step": 5500 }, { "epoch": 2.15, "learning_rate": 4.642891192944822e-05, "loss": 1.6233, "step": 6000 }, { "epoch": 2.15, "eval_accuracy": 0.6763219939373526, "eval_loss": 1.6817570924758911, "eval_runtime": 0.7881, "eval_samples_per_second": 634.403, "eval_steps_per_second": 40.602, "step": 6000 }, { "epoch": 2.32, "learning_rate": 4.6130973662257184e-05, "loss": 1.6127, "step": 6500 }, { "epoch": 2.32, "eval_accuracy": 0.685335059889932, "eval_loss": 1.5865211486816406, "eval_runtime": 0.787, "eval_samples_per_second": 635.291, "eval_steps_per_second": 40.659, "step": 6500 }, { "epoch": 2.5, "learning_rate": 4.5833035395066143e-05, "loss": 1.6274, "step": 7000 }, { "epoch": 2.5, "eval_accuracy": 0.7003633961017509, "eval_loss": 1.5004233121871948, "eval_runtime": 0.8009, "eval_samples_per_second": 624.318, "eval_steps_per_second": 39.956, "step": 7000 }, { "epoch": 2.68, "learning_rate": 4.553628888094387e-05, "loss": 1.601, "step": 7500 }, { "epoch": 2.68, "eval_accuracy": 0.6929970129439097, "eval_loss": 1.452188491821289, "eval_runtime": 0.7898, "eval_samples_per_second": 633.056, "eval_steps_per_second": 40.516, "step": 7500 }, { "epoch": 2.86, "learning_rate": 4.523835061375284e-05, "loss": 1.6123, "step": 8000 }, { "epoch": 2.86, "eval_accuracy": 0.689419795221843, "eval_loss": 1.5370689630508423, "eval_runtime": 0.8546, "eval_samples_per_second": 585.05, "eval_steps_per_second": 37.443, "step": 8000 }, { "epoch": 3.04, "learning_rate": 4.4940412346561796e-05, "loss": 1.6074, "step": 8500 }, { "epoch": 3.04, "eval_accuracy": 0.6952157912345266, "eval_loss": 1.5342369079589844, "eval_runtime": 0.8214, "eval_samples_per_second": 608.68, "eval_steps_per_second": 38.956, "step": 8500 }, { "epoch": 3.22, "learning_rate": 4.4642474079370755e-05, "loss": 1.563, "step": 9000 }, { "epoch": 3.22, "eval_accuracy": 0.6875834445927904, "eval_loss": 1.568178415298462, "eval_runtime": 0.8488, "eval_samples_per_second": 589.06, "eval_steps_per_second": 37.7, "step": 9000 }, { "epoch": 3.4, "learning_rate": 4.4344535812179714e-05, "loss": 1.5746, "step": 9500 }, { "epoch": 3.4, "eval_accuracy": 0.6957663275352806, "eval_loss": 1.5704632997512817, "eval_runtime": 0.852, "eval_samples_per_second": 586.84, "eval_steps_per_second": 37.558, "step": 9500 }, { "epoch": 3.58, "learning_rate": 4.404778929805745e-05, "loss": 1.5539, "step": 10000 }, { "epoch": 3.58, "eval_accuracy": 0.7040711597673623, "eval_loss": 1.4710707664489746, "eval_runtime": 0.85, "eval_samples_per_second": 588.248, "eval_steps_per_second": 37.648, "step": 10000 }, { "epoch": 3.75, "learning_rate": 4.374985103086641e-05, "loss": 1.578, "step": 10500 }, { "epoch": 3.75, "eval_accuracy": 0.6888888888888889, "eval_loss": 1.5465725660324097, "eval_runtime": 0.8902, "eval_samples_per_second": 561.645, "eval_steps_per_second": 35.945, "step": 10500 }, { "epoch": 3.93, "learning_rate": 4.345191276367537e-05, "loss": 1.5492, "step": 11000 }, { "epoch": 3.93, "eval_accuracy": 0.6968894771674388, "eval_loss": 1.4628891944885254, "eval_runtime": 0.8368, "eval_samples_per_second": 597.487, "eval_steps_per_second": 38.239, "step": 11000 }, { "epoch": 4.11, "learning_rate": 4.3153974496484326e-05, "loss": 1.5291, "step": 11500 }, { "epoch": 4.11, "eval_accuracy": 0.7200132538104705, "eval_loss": 1.4264894723892212, "eval_runtime": 0.8798, "eval_samples_per_second": 568.319, "eval_steps_per_second": 36.372, "step": 11500 }, { "epoch": 4.29, "learning_rate": 4.285603622929329e-05, "loss": 1.5079, "step": 12000 }, { "epoch": 4.29, "eval_accuracy": 0.6966074313408723, "eval_loss": 1.5052707195281982, "eval_runtime": 0.8186, "eval_samples_per_second": 610.796, "eval_steps_per_second": 39.091, "step": 12000 }, { "epoch": 4.47, "learning_rate": 4.255809796210226e-05, "loss": 1.5283, "step": 12500 }, { "epoch": 4.47, "eval_accuracy": 0.6902654867256637, "eval_loss": 1.5257039070129395, "eval_runtime": 0.8002, "eval_samples_per_second": 624.861, "eval_steps_per_second": 39.991, "step": 12500 }, { "epoch": 4.65, "learning_rate": 4.226015969491122e-05, "loss": 1.5141, "step": 13000 }, { "epoch": 4.65, "eval_accuracy": 0.6949898442789438, "eval_loss": 1.5063292980194092, "eval_runtime": 0.8654, "eval_samples_per_second": 577.759, "eval_steps_per_second": 36.977, "step": 13000 }, { "epoch": 4.83, "learning_rate": 4.1962221427720176e-05, "loss": 1.4979, "step": 13500 }, { "epoch": 4.83, "eval_accuracy": 0.6955945677376615, "eval_loss": 1.5636450052261353, "eval_runtime": 0.8149, "eval_samples_per_second": 613.582, "eval_steps_per_second": 39.269, "step": 13500 }, { "epoch": 5.01, "learning_rate": 4.1664283160529136e-05, "loss": 1.5294, "step": 14000 }, { "epoch": 5.01, "eval_accuracy": 0.6835193696651346, "eval_loss": 1.587847113609314, "eval_runtime": 0.8296, "eval_samples_per_second": 602.733, "eval_steps_per_second": 38.575, "step": 14000 }, { "epoch": 5.18, "learning_rate": 4.13663448933381e-05, "loss": 1.4641, "step": 14500 }, { "epoch": 5.18, "eval_accuracy": 0.6962067807989258, "eval_loss": 1.5574804544448853, "eval_runtime": 0.81, "eval_samples_per_second": 617.287, "eval_steps_per_second": 39.506, "step": 14500 }, { "epoch": 5.36, "learning_rate": 4.106840662614707e-05, "loss": 1.4754, "step": 15000 }, { "epoch": 5.36, "eval_accuracy": 0.7006847081838931, "eval_loss": 1.4779187440872192, "eval_runtime": 0.8312, "eval_samples_per_second": 601.557, "eval_steps_per_second": 38.5, "step": 15000 }, { "epoch": 5.54, "learning_rate": 4.077046835895603e-05, "loss": 1.4696, "step": 15500 }, { "epoch": 5.54, "eval_accuracy": 0.6965271015903928, "eval_loss": 1.451996922492981, "eval_runtime": 0.7909, "eval_samples_per_second": 632.19, "eval_steps_per_second": 40.46, "step": 15500 }, { "epoch": 5.72, "learning_rate": 4.0472530091764986e-05, "loss": 1.4655, "step": 16000 }, { "epoch": 5.72, "eval_accuracy": 0.683049147442327, "eval_loss": 1.6320295333862305, "eval_runtime": 0.8309, "eval_samples_per_second": 601.76, "eval_steps_per_second": 38.513, "step": 16000 }, { "epoch": 5.9, "learning_rate": 4.0174591824573945e-05, "loss": 1.4792, "step": 16500 }, { "epoch": 5.9, "eval_accuracy": 0.7134165866154338, "eval_loss": 1.415226697921753, "eval_runtime": 0.8575, "eval_samples_per_second": 583.097, "eval_steps_per_second": 37.318, "step": 16500 }, { "epoch": 6.08, "learning_rate": 3.98772494339173e-05, "loss": 1.4379, "step": 17000 }, { "epoch": 6.08, "eval_accuracy": 0.7041935483870968, "eval_loss": 1.4900156259536743, "eval_runtime": 0.8413, "eval_samples_per_second": 594.352, "eval_steps_per_second": 38.039, "step": 17000 }, { "epoch": 6.26, "learning_rate": 3.957931116672626e-05, "loss": 1.4281, "step": 17500 }, { "epoch": 6.26, "eval_accuracy": 0.6989864864864865, "eval_loss": 1.5407416820526123, "eval_runtime": 0.8677, "eval_samples_per_second": 576.232, "eval_steps_per_second": 36.879, "step": 17500 }, { "epoch": 6.44, "learning_rate": 3.928137289953522e-05, "loss": 1.436, "step": 18000 }, { "epoch": 6.44, "eval_accuracy": 0.6914175506268081, "eval_loss": 1.534258246421814, "eval_runtime": 0.843, "eval_samples_per_second": 593.143, "eval_steps_per_second": 37.961, "step": 18000 }, { "epoch": 6.61, "learning_rate": 3.8983434632344176e-05, "loss": 1.4342, "step": 18500 }, { "epoch": 6.61, "eval_accuracy": 0.7023696682464455, "eval_loss": 1.5323561429977417, "eval_runtime": 0.7874, "eval_samples_per_second": 635.024, "eval_steps_per_second": 40.642, "step": 18500 }, { "epoch": 6.79, "learning_rate": 3.868549636515314e-05, "loss": 1.4176, "step": 19000 }, { "epoch": 6.79, "eval_accuracy": 0.7132913490222075, "eval_loss": 1.4485751390457153, "eval_runtime": 0.8567, "eval_samples_per_second": 583.665, "eval_steps_per_second": 37.355, "step": 19000 }, { "epoch": 6.97, "learning_rate": 3.838755809796211e-05, "loss": 1.4308, "step": 19500 }, { "epoch": 6.97, "eval_accuracy": 0.7031503734978889, "eval_loss": 1.4598056077957153, "eval_runtime": 0.79, "eval_samples_per_second": 632.872, "eval_steps_per_second": 40.504, "step": 19500 }, { "epoch": 7.15, "learning_rate": 3.809021570730545e-05, "loss": 1.4014, "step": 20000 }, { "epoch": 7.15, "eval_accuracy": 0.6938435940099834, "eval_loss": 1.575023889541626, "eval_runtime": 0.8292, "eval_samples_per_second": 603.024, "eval_steps_per_second": 38.594, "step": 20000 }, { "epoch": 7.33, "learning_rate": 3.779227744011441e-05, "loss": 1.3661, "step": 20500 }, { "epoch": 7.33, "eval_accuracy": 0.6985221674876847, "eval_loss": 1.5403505563735962, "eval_runtime": 0.8319, "eval_samples_per_second": 601.063, "eval_steps_per_second": 38.468, "step": 20500 }, { "epoch": 7.51, "learning_rate": 3.7494935049457754e-05, "loss": 1.3857, "step": 21000 }, { "epoch": 7.51, "eval_accuracy": 0.7037155669442665, "eval_loss": 1.4692307710647583, "eval_runtime": 0.8177, "eval_samples_per_second": 611.5, "eval_steps_per_second": 39.136, "step": 21000 }, { "epoch": 7.69, "learning_rate": 3.719699678226672e-05, "loss": 1.3846, "step": 21500 }, { "epoch": 7.69, "eval_accuracy": 0.6941445861956166, "eval_loss": 1.5511342287063599, "eval_runtime": 0.7898, "eval_samples_per_second": 633.076, "eval_steps_per_second": 40.517, "step": 21500 }, { "epoch": 7.87, "learning_rate": 3.689905851507568e-05, "loss": 1.3867, "step": 22000 }, { "epoch": 7.87, "eval_accuracy": 0.6925124792013311, "eval_loss": 1.5321439504623413, "eval_runtime": 0.8379, "eval_samples_per_second": 596.713, "eval_steps_per_second": 38.19, "step": 22000 }, { "epoch": 8.04, "learning_rate": 3.660112024788464e-05, "loss": 1.3658, "step": 22500 }, { "epoch": 8.04, "eval_accuracy": 0.7020917678812416, "eval_loss": 1.5499885082244873, "eval_runtime": 0.8209, "eval_samples_per_second": 609.075, "eval_steps_per_second": 38.981, "step": 22500 }, { "epoch": 8.22, "learning_rate": 3.6303181980693604e-05, "loss": 1.3406, "step": 23000 }, { "epoch": 8.22, "eval_accuracy": 0.6959503592423253, "eval_loss": 1.523918628692627, "eval_runtime": 0.8298, "eval_samples_per_second": 602.525, "eval_steps_per_second": 38.562, "step": 23000 }, { "epoch": 8.4, "learning_rate": 3.600524371350256e-05, "loss": 1.3405, "step": 23500 }, { "epoch": 8.4, "eval_accuracy": 0.7055256064690026, "eval_loss": 1.4414023160934448, "eval_runtime": 0.8516, "eval_samples_per_second": 587.105, "eval_steps_per_second": 37.575, "step": 23500 }, { "epoch": 8.58, "learning_rate": 3.570730544631153e-05, "loss": 1.3373, "step": 24000 }, { "epoch": 8.58, "eval_accuracy": 0.6784238957737527, "eval_loss": 1.599377155303955, "eval_runtime": 0.791, "eval_samples_per_second": 632.109, "eval_steps_per_second": 40.455, "step": 24000 }, { "epoch": 8.76, "learning_rate": 3.540936717912049e-05, "loss": 1.3527, "step": 24500 }, { "epoch": 8.76, "eval_accuracy": 0.6970387243735763, "eval_loss": 1.5105814933776855, "eval_runtime": 0.8594, "eval_samples_per_second": 581.797, "eval_steps_per_second": 37.235, "step": 24500 }, { "epoch": 8.94, "learning_rate": 3.511142891192945e-05, "loss": 1.3436, "step": 25000 }, { "epoch": 8.94, "eval_accuracy": 0.7079758500158881, "eval_loss": 1.471426010131836, "eval_runtime": 0.8427, "eval_samples_per_second": 593.355, "eval_steps_per_second": 37.975, "step": 25000 }, { "epoch": 9.12, "learning_rate": 3.4813490644738414e-05, "loss": 1.3069, "step": 25500 }, { "epoch": 9.12, "eval_accuracy": 0.6953099376844867, "eval_loss": 1.4990392923355103, "eval_runtime": 0.8575, "eval_samples_per_second": 583.12, "eval_steps_per_second": 37.32, "step": 25500 }, { "epoch": 9.3, "learning_rate": 3.451555237754737e-05, "loss": 1.2969, "step": 26000 }, { "epoch": 9.3, "eval_accuracy": 0.6964285714285714, "eval_loss": 1.4809668064117432, "eval_runtime": 0.8312, "eval_samples_per_second": 601.512, "eval_steps_per_second": 38.497, "step": 26000 }, { "epoch": 9.47, "learning_rate": 3.421761411035634e-05, "loss": 1.3009, "step": 26500 }, { "epoch": 9.47, "eval_accuracy": 0.6875602700096431, "eval_loss": 1.5964903831481934, "eval_runtime": 0.8752, "eval_samples_per_second": 571.296, "eval_steps_per_second": 36.563, "step": 26500 }, { "epoch": 9.65, "learning_rate": 3.392086759623406e-05, "loss": 1.3227, "step": 27000 }, { "epoch": 9.65, "eval_accuracy": 0.7013662979830839, "eval_loss": 1.429559588432312, "eval_runtime": 0.7904, "eval_samples_per_second": 632.561, "eval_steps_per_second": 40.484, "step": 27000 }, { "epoch": 9.83, "learning_rate": 3.3622929329043025e-05, "loss": 1.3259, "step": 27500 }, { "epoch": 9.83, "eval_accuracy": 0.7189224277831873, "eval_loss": 1.413652777671814, "eval_runtime": 0.8134, "eval_samples_per_second": 614.697, "eval_steps_per_second": 39.341, "step": 27500 }, { "epoch": 10.01, "learning_rate": 3.3324991061851985e-05, "loss": 1.3131, "step": 28000 }, { "epoch": 10.01, "eval_accuracy": 0.7019570099454604, "eval_loss": 1.534200668334961, "eval_runtime": 0.8056, "eval_samples_per_second": 620.653, "eval_steps_per_second": 39.722, "step": 28000 }, { "epoch": 10.19, "learning_rate": 3.3027052794660944e-05, "loss": 1.271, "step": 28500 }, { "epoch": 10.19, "eval_accuracy": 0.711340206185567, "eval_loss": 1.470828890800476, "eval_runtime": 0.7815, "eval_samples_per_second": 639.779, "eval_steps_per_second": 40.946, "step": 28500 }, { "epoch": 10.37, "learning_rate": 3.272911452746991e-05, "loss": 1.2684, "step": 29000 }, { "epoch": 10.37, "eval_accuracy": 0.7045747422680413, "eval_loss": 1.4341672658920288, "eval_runtime": 0.7954, "eval_samples_per_second": 628.629, "eval_steps_per_second": 40.232, "step": 29000 }, { "epoch": 10.55, "learning_rate": 3.2431176260278876e-05, "loss": 1.2767, "step": 29500 }, { "epoch": 10.55, "eval_accuracy": 0.709353000335233, "eval_loss": 1.4703407287597656, "eval_runtime": 0.8179, "eval_samples_per_second": 611.351, "eval_steps_per_second": 39.126, "step": 29500 }, { "epoch": 10.73, "learning_rate": 3.2133237993087835e-05, "loss": 1.2861, "step": 30000 }, { "epoch": 10.73, "eval_accuracy": 0.7308937823834197, "eval_loss": 1.3323109149932861, "eval_runtime": 0.7855, "eval_samples_per_second": 636.523, "eval_steps_per_second": 40.737, "step": 30000 }, { "epoch": 10.9, "learning_rate": 3.1835299725896794e-05, "loss": 1.2617, "step": 30500 }, { "epoch": 10.9, "eval_accuracy": 0.7003344481605351, "eval_loss": 1.4562044143676758, "eval_runtime": 0.7951, "eval_samples_per_second": 628.826, "eval_steps_per_second": 40.245, "step": 30500 }, { "epoch": 11.08, "learning_rate": 3.153736145870575e-05, "loss": 1.2551, "step": 31000 }, { "epoch": 11.08, "eval_accuracy": 0.7169689119170984, "eval_loss": 1.4361472129821777, "eval_runtime": 0.8647, "eval_samples_per_second": 578.22, "eval_steps_per_second": 37.006, "step": 31000 }, { "epoch": 11.26, "learning_rate": 3.124001906804911e-05, "loss": 1.2404, "step": 31500 }, { "epoch": 11.26, "eval_accuracy": 0.7034617896799478, "eval_loss": 1.4536628723144531, "eval_runtime": 0.7907, "eval_samples_per_second": 632.325, "eval_steps_per_second": 40.469, "step": 31500 }, { "epoch": 11.44, "learning_rate": 3.0942080800858066e-05, "loss": 1.2562, "step": 32000 }, { "epoch": 11.44, "eval_accuracy": 0.7132209980557356, "eval_loss": 1.4038574695587158, "eval_runtime": 0.7924, "eval_samples_per_second": 631.001, "eval_steps_per_second": 40.384, "step": 32000 }, { "epoch": 11.62, "learning_rate": 3.0644142533667025e-05, "loss": 1.2489, "step": 32500 }, { "epoch": 11.62, "eval_accuracy": 0.706418918918919, "eval_loss": 1.4372212886810303, "eval_runtime": 0.8024, "eval_samples_per_second": 623.122, "eval_steps_per_second": 39.88, "step": 32500 }, { "epoch": 11.8, "learning_rate": 3.0346204266475984e-05, "loss": 1.2406, "step": 33000 }, { "epoch": 11.8, "eval_accuracy": 0.7087442472057857, "eval_loss": 1.4926137924194336, "eval_runtime": 0.8525, "eval_samples_per_second": 586.532, "eval_steps_per_second": 37.538, "step": 33000 }, { "epoch": 11.98, "learning_rate": 3.0048265999284947e-05, "loss": 1.2285, "step": 33500 }, { "epoch": 11.98, "eval_accuracy": 0.7152005392652511, "eval_loss": 1.4080321788787842, "eval_runtime": 0.8108, "eval_samples_per_second": 616.703, "eval_steps_per_second": 39.469, "step": 33500 }, { "epoch": 12.16, "learning_rate": 2.9750327732093913e-05, "loss": 1.2213, "step": 34000 }, { "epoch": 12.16, "eval_accuracy": 0.7170240415854451, "eval_loss": 1.403072476387024, "eval_runtime": 0.8459, "eval_samples_per_second": 591.089, "eval_steps_per_second": 37.83, "step": 34000 }, { "epoch": 12.33, "learning_rate": 2.9452389464902875e-05, "loss": 1.1998, "step": 34500 }, { "epoch": 12.33, "eval_accuracy": 0.7222584856396866, "eval_loss": 1.3541438579559326, "eval_runtime": 0.7909, "eval_samples_per_second": 632.16, "eval_steps_per_second": 40.458, "step": 34500 }, { "epoch": 12.51, "learning_rate": 2.9154451197711835e-05, "loss": 1.2184, "step": 35000 }, { "epoch": 12.51, "eval_accuracy": 0.7308441558441559, "eval_loss": 1.3629957437515259, "eval_runtime": 0.8716, "eval_samples_per_second": 573.677, "eval_steps_per_second": 36.715, "step": 35000 }, { "epoch": 12.69, "learning_rate": 2.8856512930520797e-05, "loss": 1.2195, "step": 35500 }, { "epoch": 12.69, "eval_accuracy": 0.7281362594169669, "eval_loss": 1.312456488609314, "eval_runtime": 0.852, "eval_samples_per_second": 586.847, "eval_steps_per_second": 37.558, "step": 35500 }, { "epoch": 12.87, "learning_rate": 2.8558574663329756e-05, "loss": 1.2178, "step": 36000 }, { "epoch": 12.87, "eval_accuracy": 0.7119236883942767, "eval_loss": 1.4257023334503174, "eval_runtime": 0.8597, "eval_samples_per_second": 581.571, "eval_steps_per_second": 37.221, "step": 36000 }, { "epoch": 13.05, "learning_rate": 2.8260636396138722e-05, "loss": 1.1918, "step": 36500 }, { "epoch": 13.05, "eval_accuracy": 0.7152686762778506, "eval_loss": 1.4108035564422607, "eval_runtime": 0.9192, "eval_samples_per_second": 543.96, "eval_steps_per_second": 34.813, "step": 36500 }, { "epoch": 13.23, "learning_rate": 2.7963294005482066e-05, "loss": 1.1664, "step": 37000 }, { "epoch": 13.23, "eval_accuracy": 0.7226588081204977, "eval_loss": 1.3577048778533936, "eval_runtime": 0.7887, "eval_samples_per_second": 633.948, "eval_steps_per_second": 40.573, "step": 37000 }, { "epoch": 13.41, "learning_rate": 2.7665355738291028e-05, "loss": 1.1754, "step": 37500 }, { "epoch": 13.41, "eval_accuracy": 0.720593191776205, "eval_loss": 1.377700924873352, "eval_runtime": 0.8445, "eval_samples_per_second": 592.06, "eval_steps_per_second": 37.892, "step": 37500 }, { "epoch": 13.59, "learning_rate": 2.7367417471099987e-05, "loss": 1.1855, "step": 38000 }, { "epoch": 13.59, "eval_accuracy": 0.7354008578027054, "eval_loss": 1.350059151649475, "eval_runtime": 0.8109, "eval_samples_per_second": 616.607, "eval_steps_per_second": 39.463, "step": 38000 }, { "epoch": 13.76, "learning_rate": 2.7070075080443334e-05, "loss": 1.1644, "step": 38500 }, { "epoch": 13.76, "eval_accuracy": 0.7206685953069752, "eval_loss": 1.374656081199646, "eval_runtime": 0.8397, "eval_samples_per_second": 595.482, "eval_steps_per_second": 38.111, "step": 38500 }, { "epoch": 13.94, "learning_rate": 2.6772136813252297e-05, "loss": 1.1709, "step": 39000 }, { "epoch": 13.94, "eval_accuracy": 0.7183739837398374, "eval_loss": 1.3703839778900146, "eval_runtime": 0.8025, "eval_samples_per_second": 623.038, "eval_steps_per_second": 39.874, "step": 39000 }, { "epoch": 14.12, "learning_rate": 2.6474198546061256e-05, "loss": 1.1613, "step": 39500 }, { "epoch": 14.12, "eval_accuracy": 0.7246875, "eval_loss": 1.4306718111038208, "eval_runtime": 0.8499, "eval_samples_per_second": 588.275, "eval_steps_per_second": 37.65, "step": 39500 }, { "epoch": 14.3, "learning_rate": 2.617626027887022e-05, "loss": 1.1443, "step": 40000 }, { "epoch": 14.3, "eval_accuracy": 0.7220978573712824, "eval_loss": 1.3189983367919922, "eval_runtime": 0.7903, "eval_samples_per_second": 632.651, "eval_steps_per_second": 40.49, "step": 40000 }, { "epoch": 14.48, "learning_rate": 2.5878322011679178e-05, "loss": 1.1356, "step": 40500 }, { "epoch": 14.48, "eval_accuracy": 0.7331329325317302, "eval_loss": 1.3287793397903442, "eval_runtime": 0.7921, "eval_samples_per_second": 631.257, "eval_steps_per_second": 40.4, "step": 40500 }, { "epoch": 14.66, "learning_rate": 2.5580383744488147e-05, "loss": 1.1493, "step": 41000 }, { "epoch": 14.66, "eval_accuracy": 0.7240227196792516, "eval_loss": 1.3504801988601685, "eval_runtime": 0.8432, "eval_samples_per_second": 592.975, "eval_steps_per_second": 37.95, "step": 41000 }, { "epoch": 14.84, "learning_rate": 2.5283041353831487e-05, "loss": 1.1417, "step": 41500 }, { "epoch": 14.84, "eval_accuracy": 0.7320369149637442, "eval_loss": 1.31459379196167, "eval_runtime": 0.8272, "eval_samples_per_second": 604.463, "eval_steps_per_second": 38.686, "step": 41500 }, { "epoch": 15.02, "learning_rate": 2.498569896317483e-05, "loss": 1.1349, "step": 42000 }, { "epoch": 15.02, "eval_accuracy": 0.7333114107201578, "eval_loss": 1.3545522689819336, "eval_runtime": 0.8634, "eval_samples_per_second": 579.106, "eval_steps_per_second": 37.063, "step": 42000 }, { "epoch": 15.19, "learning_rate": 2.4687760695983793e-05, "loss": 1.1169, "step": 42500 }, { "epoch": 15.19, "eval_accuracy": 0.7246922024623803, "eval_loss": 1.37086021900177, "eval_runtime": 0.8611, "eval_samples_per_second": 580.685, "eval_steps_per_second": 37.164, "step": 42500 }, { "epoch": 15.37, "learning_rate": 2.4390418305327136e-05, "loss": 1.1187, "step": 43000 }, { "epoch": 15.37, "eval_accuracy": 0.7217795484727756, "eval_loss": 1.4242717027664185, "eval_runtime": 0.8265, "eval_samples_per_second": 604.985, "eval_steps_per_second": 38.719, "step": 43000 }, { "epoch": 15.55, "learning_rate": 2.4092480038136102e-05, "loss": 1.118, "step": 43500 }, { "epoch": 15.55, "eval_accuracy": 0.7264245251582806, "eval_loss": 1.3835431337356567, "eval_runtime": 0.8374, "eval_samples_per_second": 597.064, "eval_steps_per_second": 38.212, "step": 43500 }, { "epoch": 15.73, "learning_rate": 2.379454177094506e-05, "loss": 1.1165, "step": 44000 }, { "epoch": 15.73, "eval_accuracy": 0.7253818654533637, "eval_loss": 1.3239895105361938, "eval_runtime": 0.8499, "eval_samples_per_second": 588.29, "eval_steps_per_second": 37.651, "step": 44000 }, { "epoch": 15.91, "learning_rate": 2.3496603503754024e-05, "loss": 1.114, "step": 44500 }, { "epoch": 15.91, "eval_accuracy": 0.7382113821138211, "eval_loss": 1.3263858556747437, "eval_runtime": 0.8424, "eval_samples_per_second": 593.546, "eval_steps_per_second": 37.987, "step": 44500 }, { "epoch": 16.09, "learning_rate": 2.3198665236562986e-05, "loss": 1.105, "step": 45000 }, { "epoch": 16.09, "eval_accuracy": 0.7333548804137039, "eval_loss": 1.3213739395141602, "eval_runtime": 0.8677, "eval_samples_per_second": 576.224, "eval_steps_per_second": 36.878, "step": 45000 }, { "epoch": 16.27, "learning_rate": 2.2900726969371946e-05, "loss": 1.0924, "step": 45500 }, { "epoch": 16.27, "eval_accuracy": 0.7282392026578073, "eval_loss": 1.384667992591858, "eval_runtime": 0.9421, "eval_samples_per_second": 530.704, "eval_steps_per_second": 33.965, "step": 45500 }, { "epoch": 16.45, "learning_rate": 2.260278870218091e-05, "loss": 1.0915, "step": 46000 }, { "epoch": 16.45, "eval_accuracy": 0.7317073170731707, "eval_loss": 1.3603721857070923, "eval_runtime": 0.7951, "eval_samples_per_second": 628.874, "eval_steps_per_second": 40.248, "step": 46000 }, { "epoch": 16.62, "learning_rate": 2.230485043498987e-05, "loss": 1.0968, "step": 46500 }, { "epoch": 16.62, "eval_accuracy": 0.7319177173191772, "eval_loss": 1.3539705276489258, "eval_runtime": 0.8815, "eval_samples_per_second": 567.187, "eval_steps_per_second": 36.3, "step": 46500 }, { "epoch": 16.8, "learning_rate": 2.2006912167798833e-05, "loss": 1.0772, "step": 47000 }, { "epoch": 16.8, "eval_accuracy": 0.7306332369013179, "eval_loss": 1.2475004196166992, "eval_runtime": 0.8301, "eval_samples_per_second": 602.308, "eval_steps_per_second": 38.548, "step": 47000 }, { "epoch": 16.98, "learning_rate": 2.1708973900607796e-05, "loss": 1.0975, "step": 47500 }, { "epoch": 16.98, "eval_accuracy": 0.7448207826372903, "eval_loss": 1.2635700702667236, "eval_runtime": 0.8269, "eval_samples_per_second": 604.655, "eval_steps_per_second": 38.698, "step": 47500 }, { "epoch": 17.16, "learning_rate": 2.1411035633416755e-05, "loss": 1.0708, "step": 48000 }, { "epoch": 17.16, "eval_accuracy": 0.7182085648904871, "eval_loss": 1.4056382179260254, "eval_runtime": 0.8973, "eval_samples_per_second": 557.236, "eval_steps_per_second": 35.663, "step": 48000 }, { "epoch": 17.34, "learning_rate": 2.111309736622572e-05, "loss": 1.0654, "step": 48500 }, { "epoch": 17.34, "eval_accuracy": 0.727630285152409, "eval_loss": 1.3769292831420898, "eval_runtime": 0.8377, "eval_samples_per_second": 596.886, "eval_steps_per_second": 38.201, "step": 48500 }, { "epoch": 17.52, "learning_rate": 2.081515909903468e-05, "loss": 1.0676, "step": 49000 }, { "epoch": 17.52, "eval_accuracy": 0.7224234441883438, "eval_loss": 1.33571457862854, "eval_runtime": 0.7909, "eval_samples_per_second": 632.166, "eval_steps_per_second": 40.459, "step": 49000 }, { "epoch": 17.7, "learning_rate": 2.0517220831843643e-05, "loss": 1.0507, "step": 49500 }, { "epoch": 17.7, "eval_accuracy": 0.712369109947644, "eval_loss": 1.4087713956832886, "eval_runtime": 0.7955, "eval_samples_per_second": 628.504, "eval_steps_per_second": 40.224, "step": 49500 }, { "epoch": 17.88, "learning_rate": 2.0219282564652605e-05, "loss": 1.0424, "step": 50000 }, { "epoch": 17.88, "eval_accuracy": 0.7314667515112949, "eval_loss": 1.3146371841430664, "eval_runtime": 0.7881, "eval_samples_per_second": 634.428, "eval_steps_per_second": 40.603, "step": 50000 }, { "epoch": 18.06, "learning_rate": 1.9921344297461568e-05, "loss": 1.0524, "step": 50500 }, { "epoch": 18.06, "eval_accuracy": 0.7393395319012503, "eval_loss": 1.28960382938385, "eval_runtime": 0.8581, "eval_samples_per_second": 582.683, "eval_steps_per_second": 37.292, "step": 50500 }, { "epoch": 18.23, "learning_rate": 1.962340603027053e-05, "loss": 1.0349, "step": 51000 }, { "epoch": 18.23, "eval_accuracy": 0.7191558441558441, "eval_loss": 1.3986730575561523, "eval_runtime": 0.7904, "eval_samples_per_second": 632.599, "eval_steps_per_second": 40.486, "step": 51000 }, { "epoch": 18.41, "learning_rate": 1.932546776307949e-05, "loss": 1.0217, "step": 51500 }, { "epoch": 18.41, "eval_accuracy": 0.7380645161290322, "eval_loss": 1.2937612533569336, "eval_runtime": 0.8575, "eval_samples_per_second": 583.089, "eval_steps_per_second": 37.318, "step": 51500 }, { "epoch": 18.59, "learning_rate": 1.9028125372422833e-05, "loss": 1.0238, "step": 52000 }, { "epoch": 18.59, "eval_accuracy": 0.738654650788542, "eval_loss": 1.296163558959961, "eval_runtime": 0.8423, "eval_samples_per_second": 593.617, "eval_steps_per_second": 37.992, "step": 52000 }, { "epoch": 18.77, "learning_rate": 1.87301871052318e-05, "loss": 1.0292, "step": 52500 }, { "epoch": 18.77, "eval_accuracy": 0.737131757850437, "eval_loss": 1.3194587230682373, "eval_runtime": 0.8232, "eval_samples_per_second": 607.358, "eval_steps_per_second": 38.871, "step": 52500 }, { "epoch": 18.95, "learning_rate": 1.8433440591109523e-05, "loss": 1.0426, "step": 53000 }, { "epoch": 18.95, "eval_accuracy": 0.7411687025420931, "eval_loss": 1.2835460901260376, "eval_runtime": 0.7859, "eval_samples_per_second": 636.221, "eval_steps_per_second": 40.718, "step": 53000 }, { "epoch": 19.13, "learning_rate": 1.8135502323918486e-05, "loss": 1.0196, "step": 53500 }, { "epoch": 19.13, "eval_accuracy": 0.747275204359673, "eval_loss": 1.234621524810791, "eval_runtime": 0.8361, "eval_samples_per_second": 597.997, "eval_steps_per_second": 38.272, "step": 53500 }, { "epoch": 19.31, "learning_rate": 1.7837564056727445e-05, "loss": 1.012, "step": 54000 }, { "epoch": 19.31, "eval_accuracy": 0.7338292367399741, "eval_loss": 1.3665757179260254, "eval_runtime": 0.8157, "eval_samples_per_second": 612.938, "eval_steps_per_second": 39.228, "step": 54000 }, { "epoch": 19.49, "learning_rate": 1.753962578953641e-05, "loss": 1.0256, "step": 54500 }, { "epoch": 19.49, "eval_accuracy": 0.7364842991259307, "eval_loss": 1.3140363693237305, "eval_runtime": 0.7949, "eval_samples_per_second": 628.974, "eval_steps_per_second": 40.254, "step": 54500 }, { "epoch": 19.66, "learning_rate": 1.724168752234537e-05, "loss": 0.9824, "step": 55000 }, { "epoch": 19.66, "eval_accuracy": 0.7416496250852079, "eval_loss": 1.2764383554458618, "eval_runtime": 0.8178, "eval_samples_per_second": 611.417, "eval_steps_per_second": 39.131, "step": 55000 }, { "epoch": 19.84, "learning_rate": 1.6943749255154336e-05, "loss": 1.0048, "step": 55500 }, { "epoch": 19.84, "eval_accuracy": 0.7487891507910881, "eval_loss": 1.2514091730117798, "eval_runtime": 0.8164, "eval_samples_per_second": 612.474, "eval_steps_per_second": 39.198, "step": 55500 }, { "epoch": 20.02, "learning_rate": 1.6645810987963295e-05, "loss": 0.9947, "step": 56000 }, { "epoch": 20.02, "eval_accuracy": 0.7431572246976448, "eval_loss": 1.3350915908813477, "eval_runtime": 0.7912, "eval_samples_per_second": 631.988, "eval_steps_per_second": 40.447, "step": 56000 }, { "epoch": 20.2, "learning_rate": 1.634846859730664e-05, "loss": 0.977, "step": 56500 }, { "epoch": 20.2, "eval_accuracy": 0.7451045469631596, "eval_loss": 1.2854044437408447, "eval_runtime": 0.8499, "eval_samples_per_second": 588.28, "eval_steps_per_second": 37.65, "step": 56500 }, { "epoch": 20.38, "learning_rate": 1.60505303301156e-05, "loss": 0.9862, "step": 57000 }, { "epoch": 20.38, "eval_accuracy": 0.7285475792988314, "eval_loss": 1.366584300994873, "eval_runtime": 0.816, "eval_samples_per_second": 612.774, "eval_steps_per_second": 39.218, "step": 57000 }, { "epoch": 20.56, "learning_rate": 1.5752592062924564e-05, "loss": 0.9699, "step": 57500 }, { "epoch": 20.56, "eval_accuracy": 0.7347811780190853, "eval_loss": 1.3123427629470825, "eval_runtime": 0.7779, "eval_samples_per_second": 642.731, "eval_steps_per_second": 41.135, "step": 57500 }, { "epoch": 20.74, "learning_rate": 1.5454653795733526e-05, "loss": 0.977, "step": 58000 }, { "epoch": 20.74, "eval_accuracy": 0.7254770672915969, "eval_loss": 1.3425793647766113, "eval_runtime": 0.8285, "eval_samples_per_second": 603.485, "eval_steps_per_second": 38.623, "step": 58000 }, { "epoch": 20.92, "learning_rate": 1.5157311405076868e-05, "loss": 0.9749, "step": 58500 }, { "epoch": 20.92, "eval_accuracy": 0.7296604740550929, "eval_loss": 1.3763371706008911, "eval_runtime": 0.7855, "eval_samples_per_second": 636.556, "eval_steps_per_second": 40.74, "step": 58500 }, { "epoch": 21.09, "learning_rate": 1.4859373137885832e-05, "loss": 0.9505, "step": 59000 }, { "epoch": 21.09, "eval_accuracy": 0.7434469200524246, "eval_loss": 1.2372225522994995, "eval_runtime": 0.7967, "eval_samples_per_second": 627.592, "eval_steps_per_second": 40.166, "step": 59000 }, { "epoch": 21.27, "learning_rate": 1.4561434870694793e-05, "loss": 0.9438, "step": 59500 }, { "epoch": 21.27, "eval_accuracy": 0.7159090909090909, "eval_loss": 1.433412790298462, "eval_runtime": 0.7929, "eval_samples_per_second": 630.567, "eval_steps_per_second": 40.356, "step": 59500 }, { "epoch": 21.45, "learning_rate": 1.4263496603503754e-05, "loss": 0.944, "step": 60000 }, { "epoch": 21.45, "eval_accuracy": 0.7507936507936508, "eval_loss": 1.269033432006836, "eval_runtime": 0.8274, "eval_samples_per_second": 604.314, "eval_steps_per_second": 38.676, "step": 60000 }, { "epoch": 21.63, "learning_rate": 1.3965558336312718e-05, "loss": 0.9427, "step": 60500 }, { "epoch": 21.63, "eval_accuracy": 0.7485941118094608, "eval_loss": 1.2185914516448975, "eval_runtime": 0.7923, "eval_samples_per_second": 631.05, "eval_steps_per_second": 40.387, "step": 60500 }, { "epoch": 21.81, "learning_rate": 1.3667620069121679e-05, "loss": 0.9553, "step": 61000 }, { "epoch": 21.81, "eval_accuracy": 0.726882430647292, "eval_loss": 1.3940554857254028, "eval_runtime": 0.7961, "eval_samples_per_second": 628.083, "eval_steps_per_second": 40.197, "step": 61000 }, { "epoch": 21.99, "learning_rate": 1.3369681801930641e-05, "loss": 0.9571, "step": 61500 }, { "epoch": 21.99, "eval_accuracy": 0.7273940607273941, "eval_loss": 1.4162867069244385, "eval_runtime": 0.791, "eval_samples_per_second": 632.128, "eval_steps_per_second": 40.456, "step": 61500 }, { "epoch": 22.17, "learning_rate": 1.3071743534739602e-05, "loss": 0.932, "step": 62000 }, { "epoch": 22.17, "eval_accuracy": 0.7522727272727273, "eval_loss": 1.2717351913452148, "eval_runtime": 0.796, "eval_samples_per_second": 628.103, "eval_steps_per_second": 40.199, "step": 62000 }, { "epoch": 22.35, "learning_rate": 1.2773805267548563e-05, "loss": 0.9166, "step": 62500 }, { "epoch": 22.35, "eval_accuracy": 0.73956326268465, "eval_loss": 1.217714786529541, "eval_runtime": 0.8289, "eval_samples_per_second": 603.185, "eval_steps_per_second": 38.604, "step": 62500 }, { "epoch": 22.52, "learning_rate": 1.2475867000357526e-05, "loss": 0.9301, "step": 63000 }, { "epoch": 22.52, "eval_accuracy": 0.7377950210151956, "eval_loss": 1.3264496326446533, "eval_runtime": 0.8524, "eval_samples_per_second": 586.56, "eval_steps_per_second": 37.54, "step": 63000 }, { "epoch": 22.7, "learning_rate": 1.2177928733166488e-05, "loss": 0.9351, "step": 63500 }, { "epoch": 22.7, "eval_accuracy": 0.752010292698617, "eval_loss": 1.2570440769195557, "eval_runtime": 0.785, "eval_samples_per_second": 636.94, "eval_steps_per_second": 40.764, "step": 63500 }, { "epoch": 22.88, "learning_rate": 1.1879990465975451e-05, "loss": 0.9211, "step": 64000 }, { "epoch": 22.88, "eval_accuracy": 0.75, "eval_loss": 1.2638896703720093, "eval_runtime": 0.8753, "eval_samples_per_second": 571.265, "eval_steps_per_second": 36.561, "step": 64000 }, { "epoch": 23.06, "learning_rate": 1.1582052198784414e-05, "loss": 0.9211, "step": 64500 }, { "epoch": 23.06, "eval_accuracy": 0.7605543022881083, "eval_loss": 1.2376515865325928, "eval_runtime": 0.7946, "eval_samples_per_second": 629.265, "eval_steps_per_second": 40.273, "step": 64500 }, { "epoch": 23.24, "learning_rate": 1.1284113931593374e-05, "loss": 0.9196, "step": 65000 }, { "epoch": 23.24, "eval_accuracy": 0.7485168094924193, "eval_loss": 1.2738728523254395, "eval_runtime": 0.8576, "eval_samples_per_second": 583.036, "eval_steps_per_second": 37.314, "step": 65000 }, { "epoch": 23.42, "learning_rate": 1.098677154093672e-05, "loss": 0.9062, "step": 65500 }, { "epoch": 23.42, "eval_accuracy": 0.7365366010964205, "eval_loss": 1.3262896537780762, "eval_runtime": 0.8401, "eval_samples_per_second": 595.164, "eval_steps_per_second": 38.09, "step": 65500 }, { "epoch": 23.6, "learning_rate": 1.068883327374568e-05, "loss": 0.8965, "step": 66000 }, { "epoch": 23.6, "eval_accuracy": 0.7455209024552091, "eval_loss": 1.2814128398895264, "eval_runtime": 0.778, "eval_samples_per_second": 642.691, "eval_steps_per_second": 41.132, "step": 66000 }, { "epoch": 23.78, "learning_rate": 1.0392086759623406e-05, "loss": 0.9004, "step": 66500 }, { "epoch": 23.78, "eval_accuracy": 0.7561779242174629, "eval_loss": 1.2108628749847412, "eval_runtime": 0.8669, "eval_samples_per_second": 576.736, "eval_steps_per_second": 36.911, "step": 66500 }, { "epoch": 23.95, "learning_rate": 1.0094148492432369e-05, "loss": 0.9094, "step": 67000 }, { "epoch": 23.95, "eval_accuracy": 0.7528089887640449, "eval_loss": 1.2629289627075195, "eval_runtime": 0.8653, "eval_samples_per_second": 577.859, "eval_steps_per_second": 36.983, "step": 67000 }, { "epoch": 24.13, "learning_rate": 9.79621022524133e-06, "loss": 0.8937, "step": 67500 }, { "epoch": 24.13, "eval_accuracy": 0.7375168690958165, "eval_loss": 1.2770532369613647, "eval_runtime": 0.8492, "eval_samples_per_second": 588.814, "eval_steps_per_second": 37.684, "step": 67500 }, { "epoch": 24.31, "learning_rate": 9.498271958050292e-06, "loss": 0.8711, "step": 68000 }, { "epoch": 24.31, "eval_accuracy": 0.7353233830845771, "eval_loss": 1.3746039867401123, "eval_runtime": 0.7929, "eval_samples_per_second": 630.629, "eval_steps_per_second": 40.36, "step": 68000 }, { "epoch": 24.49, "learning_rate": 9.200333690859255e-06, "loss": 0.8972, "step": 68500 }, { "epoch": 24.49, "eval_accuracy": 0.7453750420450723, "eval_loss": 1.2529133558273315, "eval_runtime": 0.8497, "eval_samples_per_second": 588.462, "eval_steps_per_second": 37.662, "step": 68500 }, { "epoch": 24.67, "learning_rate": 8.902395423668217e-06, "loss": 0.8863, "step": 69000 }, { "epoch": 24.67, "eval_accuracy": 0.7359154929577465, "eval_loss": 1.3219196796417236, "eval_runtime": 0.8149, "eval_samples_per_second": 613.598, "eval_steps_per_second": 39.27, "step": 69000 }, { "epoch": 24.85, "learning_rate": 8.604457156477178e-06, "loss": 0.8823, "step": 69500 }, { "epoch": 24.85, "eval_accuracy": 0.7367235275185066, "eval_loss": 1.313620924949646, "eval_runtime": 0.8311, "eval_samples_per_second": 601.621, "eval_steps_per_second": 38.504, "step": 69500 }, { "epoch": 25.03, "learning_rate": 8.306518889286139e-06, "loss": 0.8759, "step": 70000 }, { "epoch": 25.03, "eval_accuracy": 0.7427812811151676, "eval_loss": 1.3151708841323853, "eval_runtime": 0.7986, "eval_samples_per_second": 626.093, "eval_steps_per_second": 40.07, "step": 70000 }, { "epoch": 25.21, "learning_rate": 8.008580622095102e-06, "loss": 0.8722, "step": 70500 }, { "epoch": 25.21, "eval_accuracy": 0.7569644572526417, "eval_loss": 1.3108021020889282, "eval_runtime": 0.8281, "eval_samples_per_second": 603.782, "eval_steps_per_second": 38.642, "step": 70500 }, { "epoch": 25.38, "learning_rate": 7.710642354904064e-06, "loss": 0.8548, "step": 71000 }, { "epoch": 25.38, "eval_accuracy": 0.7367716008037508, "eval_loss": 1.3503183126449585, "eval_runtime": 0.7871, "eval_samples_per_second": 635.233, "eval_steps_per_second": 40.655, "step": 71000 }, { "epoch": 25.56, "learning_rate": 7.412704087713027e-06, "loss": 0.8728, "step": 71500 }, { "epoch": 25.56, "eval_accuracy": 0.7402768622280818, "eval_loss": 1.3091211318969727, "eval_runtime": 0.8581, "eval_samples_per_second": 582.712, "eval_steps_per_second": 37.294, "step": 71500 }, { "epoch": 25.74, "learning_rate": 7.114765820521989e-06, "loss": 0.8633, "step": 72000 }, { "epoch": 25.74, "eval_accuracy": 0.7416481069042317, "eval_loss": 1.2952070236206055, "eval_runtime": 0.8515, "eval_samples_per_second": 587.213, "eval_steps_per_second": 37.582, "step": 72000 }, { "epoch": 25.92, "learning_rate": 6.816827553330949e-06, "loss": 0.8612, "step": 72500 }, { "epoch": 25.92, "eval_accuracy": 0.7719072164948454, "eval_loss": 1.1612097024917603, "eval_runtime": 0.7967, "eval_samples_per_second": 627.618, "eval_steps_per_second": 40.168, "step": 72500 }, { "epoch": 26.1, "learning_rate": 6.5194851626742935e-06, "loss": 0.8677, "step": 73000 }, { "epoch": 26.1, "eval_accuracy": 0.7449731903485255, "eval_loss": 1.2855061292648315, "eval_runtime": 0.8112, "eval_samples_per_second": 616.391, "eval_steps_per_second": 39.449, "step": 73000 }, { "epoch": 26.28, "learning_rate": 6.2221427720176384e-06, "loss": 0.8526, "step": 73500 }, { "epoch": 26.28, "eval_accuracy": 0.7544929396662388, "eval_loss": 1.297914981842041, "eval_runtime": 0.8472, "eval_samples_per_second": 590.203, "eval_steps_per_second": 37.773, "step": 73500 }, { "epoch": 26.46, "learning_rate": 5.9242045048266e-06, "loss": 0.8594, "step": 74000 }, { "epoch": 26.46, "eval_accuracy": 0.7598070739549839, "eval_loss": 1.2569819688796997, "eval_runtime": 0.7923, "eval_samples_per_second": 631.066, "eval_steps_per_second": 40.388, "step": 74000 }, { "epoch": 26.64, "learning_rate": 5.626266237635562e-06, "loss": 0.8481, "step": 74500 }, { "epoch": 26.64, "eval_accuracy": 0.7491992312620115, "eval_loss": 1.2336714267730713, "eval_runtime": 0.8668, "eval_samples_per_second": 576.839, "eval_steps_per_second": 36.918, "step": 74500 }, { "epoch": 26.81, "learning_rate": 5.3283279704445245e-06, "loss": 0.855, "step": 75000 }, { "epoch": 26.81, "eval_accuracy": 0.7443507588532884, "eval_loss": 1.2874828577041626, "eval_runtime": 0.7926, "eval_samples_per_second": 630.803, "eval_steps_per_second": 40.371, "step": 75000 }, { "epoch": 26.99, "learning_rate": 5.030389703253486e-06, "loss": 0.835, "step": 75500 }, { "epoch": 26.99, "eval_accuracy": 0.7584731819677526, "eval_loss": 1.2270281314849854, "eval_runtime": 0.8172, "eval_samples_per_second": 611.826, "eval_steps_per_second": 39.157, "step": 75500 }, { "epoch": 27.17, "learning_rate": 4.732451436062448e-06, "loss": 0.8309, "step": 76000 }, { "epoch": 27.17, "eval_accuracy": 0.7389322916666666, "eval_loss": 1.2539992332458496, "eval_runtime": 0.8357, "eval_samples_per_second": 598.292, "eval_steps_per_second": 38.291, "step": 76000 }, { "epoch": 27.35, "learning_rate": 4.43451316887141e-06, "loss": 0.8326, "step": 76500 }, { "epoch": 27.35, "eval_accuracy": 0.7374631268436578, "eval_loss": 1.3610546588897705, "eval_runtime": 0.7953, "eval_samples_per_second": 628.676, "eval_steps_per_second": 40.235, "step": 76500 }, { "epoch": 27.53, "learning_rate": 4.136574901680372e-06, "loss": 0.8398, "step": 77000 }, { "epoch": 27.53, "eval_accuracy": 0.7504918032786885, "eval_loss": 1.2247506380081177, "eval_runtime": 0.859, "eval_samples_per_second": 582.099, "eval_steps_per_second": 37.254, "step": 77000 }, { "epoch": 27.71, "learning_rate": 3.838636634489334e-06, "loss": 0.8304, "step": 77500 }, { "epoch": 27.71, "eval_accuracy": 0.7607282184655396, "eval_loss": 1.2403171062469482, "eval_runtime": 0.9471, "eval_samples_per_second": 527.922, "eval_steps_per_second": 33.787, "step": 77500 }, { "epoch": 27.89, "learning_rate": 3.5406983672982957e-06, "loss": 0.8373, "step": 78000 }, { "epoch": 27.89, "eval_accuracy": 0.7611295681063123, "eval_loss": 1.1708660125732422, "eval_runtime": 0.8284, "eval_samples_per_second": 603.609, "eval_steps_per_second": 38.631, "step": 78000 }, { "epoch": 28.07, "learning_rate": 3.2427601001072583e-06, "loss": 0.8462, "step": 78500 }, { "epoch": 28.07, "eval_accuracy": 0.7508185985592666, "eval_loss": 1.289104700088501, "eval_runtime": 0.8603, "eval_samples_per_second": 581.16, "eval_steps_per_second": 37.194, "step": 78500 }, { "epoch": 28.24, "learning_rate": 2.945417709450602e-06, "loss": 0.8259, "step": 79000 }, { "epoch": 28.24, "eval_accuracy": 0.7500814597588791, "eval_loss": 1.2452012300491333, "eval_runtime": 0.8046, "eval_samples_per_second": 621.394, "eval_steps_per_second": 39.769, "step": 79000 }, { "epoch": 28.42, "learning_rate": 2.647479442259564e-06, "loss": 0.8334, "step": 79500 }, { "epoch": 28.42, "eval_accuracy": 0.746810598626104, "eval_loss": 1.2985996007919312, "eval_runtime": 0.9197, "eval_samples_per_second": 543.676, "eval_steps_per_second": 34.795, "step": 79500 }, { "epoch": 28.6, "learning_rate": 2.349541175068526e-06, "loss": 0.8115, "step": 80000 }, { "epoch": 28.6, "eval_accuracy": 0.7514638906961614, "eval_loss": 1.2879589796066284, "eval_runtime": 0.7986, "eval_samples_per_second": 626.129, "eval_steps_per_second": 40.072, "step": 80000 }, { "epoch": 28.78, "learning_rate": 2.0516029078774876e-06, "loss": 0.8205, "step": 80500 }, { "epoch": 28.78, "eval_accuracy": 0.75615359369872, "eval_loss": 1.2727956771850586, "eval_runtime": 0.8652, "eval_samples_per_second": 577.899, "eval_steps_per_second": 36.986, "step": 80500 }, { "epoch": 28.96, "learning_rate": 1.7536646406864498e-06, "loss": 0.8261, "step": 81000 }, { "epoch": 28.96, "eval_accuracy": 0.7523561910952227, "eval_loss": 1.2660555839538574, "eval_runtime": 0.7893, "eval_samples_per_second": 633.494, "eval_steps_per_second": 40.544, "step": 81000 }, { "epoch": 29.14, "learning_rate": 1.4563222500297937e-06, "loss": 0.8299, "step": 81500 }, { "epoch": 29.14, "eval_accuracy": 0.7486106570774763, "eval_loss": 1.25924813747406, "eval_runtime": 0.8513, "eval_samples_per_second": 587.342, "eval_steps_per_second": 37.59, "step": 81500 }, { "epoch": 29.32, "learning_rate": 1.1583839828387559e-06, "loss": 0.8276, "step": 82000 }, { "epoch": 29.32, "eval_accuracy": 0.7529644268774703, "eval_loss": 1.2325080633163452, "eval_runtime": 0.8587, "eval_samples_per_second": 582.291, "eval_steps_per_second": 37.267, "step": 82000 }, { "epoch": 29.5, "learning_rate": 8.604457156477178e-07, "loss": 0.8112, "step": 82500 }, { "epoch": 29.5, "eval_accuracy": 0.7477890599410416, "eval_loss": 1.3154096603393555, "eval_runtime": 0.8166, "eval_samples_per_second": 612.267, "eval_steps_per_second": 39.185, "step": 82500 }, { "epoch": 29.67, "learning_rate": 5.625074484566799e-07, "loss": 0.8111, "step": 83000 }, { "epoch": 29.67, "eval_accuracy": 0.740531561461794, "eval_loss": 1.3342524766921997, "eval_runtime": 0.8076, "eval_samples_per_second": 619.083, "eval_steps_per_second": 39.621, "step": 83000 }, { "epoch": 29.85, "learning_rate": 2.645691812656418e-07, "loss": 0.8148, "step": 83500 }, { "epoch": 29.85, "eval_accuracy": 0.7484622855292975, "eval_loss": 1.2806158065795898, "eval_runtime": 0.8122, "eval_samples_per_second": 615.596, "eval_steps_per_second": 39.398, "step": 83500 }, { "epoch": 30.0, "step": 83910, "total_flos": 3.583580261367381e+17, "train_loss": 1.1746184680817338, "train_runtime": 16410.0948, "train_samples_per_second": 163.619, "train_steps_per_second": 5.113 } ], "max_steps": 83910, "num_train_epochs": 30, "total_flos": 3.583580261367381e+17, "trial_name": null, "trial_params": null }