{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.9736303748052775, "global_step": 530000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9953078958728256e-05, "loss": 6.7877, "step": 500 }, { "epoch": 0.0, "eval_loss": 6.12216329574585, "eval_runtime": 227.3514, "eval_samples_per_second": 439.848, "eval_steps_per_second": 13.745, "step": 500 }, { "epoch": 0.01, "learning_rate": 4.990615791745651e-05, "loss": 5.9665, "step": 1000 }, { "epoch": 0.01, "eval_loss": 5.832594394683838, "eval_runtime": 227.8781, "eval_samples_per_second": 438.831, "eval_steps_per_second": 13.713, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.9859236876184755e-05, "loss": 5.7813, "step": 1500 }, { "epoch": 0.01, "eval_loss": 5.70578145980835, "eval_runtime": 227.8691, "eval_samples_per_second": 438.848, "eval_steps_per_second": 13.714, "step": 1500 }, { "epoch": 0.02, "learning_rate": 4.981231583491301e-05, "loss": 5.6811, "step": 2000 }, { "epoch": 0.02, "eval_loss": 5.598133563995361, "eval_runtime": 228.0113, "eval_samples_per_second": 438.575, "eval_steps_per_second": 13.705, "step": 2000 }, { "epoch": 0.02, "learning_rate": 4.976539479364126e-05, "loss": 5.5624, "step": 2500 }, { "epoch": 0.02, "eval_loss": 5.457843780517578, "eval_runtime": 230.6878, "eval_samples_per_second": 433.486, "eval_steps_per_second": 13.546, "step": 2500 }, { "epoch": 0.03, "learning_rate": 4.9718473752369515e-05, "loss": 5.4101, "step": 3000 }, { "epoch": 0.03, "eval_loss": 5.203133583068848, "eval_runtime": 230.8416, "eval_samples_per_second": 433.198, "eval_steps_per_second": 13.537, "step": 3000 }, { "epoch": 0.03, "learning_rate": 4.967155271109777e-05, "loss": 5.1461, "step": 3500 }, { "epoch": 0.03, "eval_loss": 4.861812591552734, "eval_runtime": 231.3953, "eval_samples_per_second": 432.161, "eval_steps_per_second": 13.505, "step": 3500 }, { "epoch": 0.04, "learning_rate": 4.962463166982602e-05, "loss": 4.8415, "step": 4000 }, { "epoch": 0.04, "eval_loss": 4.487979888916016, "eval_runtime": 231.5686, "eval_samples_per_second": 431.837, "eval_steps_per_second": 13.495, "step": 4000 }, { "epoch": 0.04, "learning_rate": 4.957771062855427e-05, "loss": 4.5424, "step": 4500 }, { "epoch": 0.04, "eval_loss": 4.209322452545166, "eval_runtime": 232.2861, "eval_samples_per_second": 430.504, "eval_steps_per_second": 13.453, "step": 4500 }, { "epoch": 0.05, "learning_rate": 4.953078958728252e-05, "loss": 4.286, "step": 5000 }, { "epoch": 0.05, "eval_loss": 3.9922587871551514, "eval_runtime": 231.4165, "eval_samples_per_second": 432.121, "eval_steps_per_second": 13.504, "step": 5000 }, { "epoch": 0.05, "learning_rate": 4.9483868546010775e-05, "loss": 4.0996, "step": 5500 }, { "epoch": 0.05, "eval_loss": 3.8145012855529785, "eval_runtime": 230.7704, "eval_samples_per_second": 433.331, "eval_steps_per_second": 13.542, "step": 5500 }, { "epoch": 0.06, "learning_rate": 4.943694750473903e-05, "loss": 3.9396, "step": 6000 }, { "epoch": 0.06, "eval_loss": 3.652764320373535, "eval_runtime": 230.0751, "eval_samples_per_second": 434.641, "eval_steps_per_second": 13.583, "step": 6000 }, { "epoch": 0.06, "learning_rate": 4.939002646346728e-05, "loss": 3.8204, "step": 6500 }, { "epoch": 0.06, "eval_loss": 3.5459558963775635, "eval_runtime": 227.8247, "eval_samples_per_second": 438.934, "eval_steps_per_second": 13.717, "step": 6500 }, { "epoch": 0.07, "learning_rate": 4.934310542219553e-05, "loss": 3.7161, "step": 7000 }, { "epoch": 0.07, "eval_loss": 3.432518720626831, "eval_runtime": 228.0814, "eval_samples_per_second": 438.44, "eval_steps_per_second": 13.701, "step": 7000 }, { "epoch": 0.07, "learning_rate": 4.929618438092378e-05, "loss": 3.602, "step": 7500 }, { "epoch": 0.07, "eval_loss": 3.3445019721984863, "eval_runtime": 227.7955, "eval_samples_per_second": 438.99, "eval_steps_per_second": 13.718, "step": 7500 }, { "epoch": 0.08, "learning_rate": 4.9249263339652035e-05, "loss": 3.519, "step": 8000 }, { "epoch": 0.08, "eval_loss": 3.2647743225097656, "eval_runtime": 227.9434, "eval_samples_per_second": 438.705, "eval_steps_per_second": 13.71, "step": 8000 }, { "epoch": 0.08, "learning_rate": 4.920234229838029e-05, "loss": 3.4347, "step": 8500 }, { "epoch": 0.08, "eval_loss": 3.188307762145996, "eval_runtime": 228.5169, "eval_samples_per_second": 437.604, "eval_steps_per_second": 13.675, "step": 8500 }, { "epoch": 0.08, "learning_rate": 4.915542125710854e-05, "loss": 3.3642, "step": 9000 }, { "epoch": 0.08, "eval_loss": 3.1089322566986084, "eval_runtime": 228.7192, "eval_samples_per_second": 437.217, "eval_steps_per_second": 13.663, "step": 9000 }, { "epoch": 0.09, "learning_rate": 4.9108500215836794e-05, "loss": 3.29, "step": 9500 }, { "epoch": 0.09, "eval_loss": 3.044532299041748, "eval_runtime": 229.7935, "eval_samples_per_second": 435.173, "eval_steps_per_second": 13.599, "step": 9500 }, { "epoch": 0.09, "learning_rate": 4.906157917456504e-05, "loss": 3.2545, "step": 10000 }, { "epoch": 0.09, "eval_loss": 3.0254080295562744, "eval_runtime": 229.6489, "eval_samples_per_second": 435.447, "eval_steps_per_second": 13.608, "step": 10000 }, { "epoch": 0.1, "learning_rate": 4.9014658133293294e-05, "loss": 3.2036, "step": 10500 }, { "epoch": 0.1, "eval_loss": 2.9655680656433105, "eval_runtime": 228.7959, "eval_samples_per_second": 437.071, "eval_steps_per_second": 13.658, "step": 10500 }, { "epoch": 0.1, "learning_rate": 4.896773709202155e-05, "loss": 3.1292, "step": 11000 }, { "epoch": 0.1, "eval_loss": 2.92611026763916, "eval_runtime": 224.7807, "eval_samples_per_second": 444.878, "eval_steps_per_second": 13.902, "step": 11000 }, { "epoch": 0.11, "learning_rate": 4.89208160507498e-05, "loss": 3.1273, "step": 11500 }, { "epoch": 0.11, "eval_loss": 2.880321502685547, "eval_runtime": 225.1803, "eval_samples_per_second": 444.089, "eval_steps_per_second": 13.878, "step": 11500 }, { "epoch": 0.11, "learning_rate": 4.8873895009478054e-05, "loss": 3.0435, "step": 12000 }, { "epoch": 0.11, "eval_loss": 2.8387928009033203, "eval_runtime": 229.766, "eval_samples_per_second": 435.225, "eval_steps_per_second": 13.601, "step": 12000 }, { "epoch": 0.12, "learning_rate": 4.88269739682063e-05, "loss": 2.9999, "step": 12500 }, { "epoch": 0.12, "eval_loss": 2.7962071895599365, "eval_runtime": 226.0122, "eval_samples_per_second": 442.454, "eval_steps_per_second": 13.827, "step": 12500 }, { "epoch": 0.12, "learning_rate": 4.8780052926934554e-05, "loss": 2.9642, "step": 13000 }, { "epoch": 0.12, "eval_loss": 2.7659239768981934, "eval_runtime": 225.1957, "eval_samples_per_second": 444.058, "eval_steps_per_second": 13.877, "step": 13000 }, { "epoch": 0.13, "learning_rate": 4.873313188566281e-05, "loss": 2.9657, "step": 13500 }, { "epoch": 0.13, "eval_loss": 2.729302167892456, "eval_runtime": 225.23, "eval_samples_per_second": 443.99, "eval_steps_per_second": 13.875, "step": 13500 }, { "epoch": 0.13, "learning_rate": 4.868621084439106e-05, "loss": 2.9025, "step": 14000 }, { "epoch": 0.13, "eval_loss": 2.698721170425415, "eval_runtime": 225.2627, "eval_samples_per_second": 443.926, "eval_steps_per_second": 13.873, "step": 14000 }, { "epoch": 0.14, "learning_rate": 4.8639289803119314e-05, "loss": 2.9036, "step": 14500 }, { "epoch": 0.14, "eval_loss": 2.668630361557007, "eval_runtime": 225.2216, "eval_samples_per_second": 444.007, "eval_steps_per_second": 13.875, "step": 14500 }, { "epoch": 0.14, "learning_rate": 4.859236876184757e-05, "loss": 2.8405, "step": 15000 }, { "epoch": 0.14, "eval_loss": 2.6363720893859863, "eval_runtime": 226.5807, "eval_samples_per_second": 441.344, "eval_steps_per_second": 13.792, "step": 15000 }, { "epoch": 0.15, "learning_rate": 4.8545447720575814e-05, "loss": 2.8067, "step": 15500 }, { "epoch": 0.15, "eval_loss": 2.6151633262634277, "eval_runtime": 230.6298, "eval_samples_per_second": 433.595, "eval_steps_per_second": 13.55, "step": 15500 }, { "epoch": 0.15, "learning_rate": 4.849852667930407e-05, "loss": 2.7782, "step": 16000 }, { "epoch": 0.15, "eval_loss": 2.586013078689575, "eval_runtime": 237.3092, "eval_samples_per_second": 421.391, "eval_steps_per_second": 13.168, "step": 16000 }, { "epoch": 0.15, "learning_rate": 4.845160563803232e-05, "loss": 2.7647, "step": 16500 }, { "epoch": 0.15, "eval_loss": 2.5545260906219482, "eval_runtime": 239.3104, "eval_samples_per_second": 417.867, "eval_steps_per_second": 13.058, "step": 16500 }, { "epoch": 0.16, "learning_rate": 4.8404684596760573e-05, "loss": 2.7115, "step": 17000 }, { "epoch": 0.16, "eval_loss": 2.5350353717803955, "eval_runtime": 242.6234, "eval_samples_per_second": 412.161, "eval_steps_per_second": 12.88, "step": 17000 }, { "epoch": 0.16, "learning_rate": 4.835776355548883e-05, "loss": 2.6993, "step": 17500 }, { "epoch": 0.16, "eval_loss": 2.502089738845825, "eval_runtime": 236.8183, "eval_samples_per_second": 422.265, "eval_steps_per_second": 13.196, "step": 17500 }, { "epoch": 0.17, "learning_rate": 4.831084251421707e-05, "loss": 2.7097, "step": 18000 }, { "epoch": 0.17, "eval_loss": 2.493856191635132, "eval_runtime": 236.3441, "eval_samples_per_second": 423.112, "eval_steps_per_second": 13.222, "step": 18000 }, { "epoch": 0.17, "learning_rate": 4.8263921472945327e-05, "loss": 2.6829, "step": 18500 }, { "epoch": 0.17, "eval_loss": 2.484680652618408, "eval_runtime": 234.9827, "eval_samples_per_second": 425.563, "eval_steps_per_second": 13.299, "step": 18500 }, { "epoch": 0.18, "learning_rate": 4.821700043167358e-05, "loss": 2.6755, "step": 19000 }, { "epoch": 0.18, "eval_loss": 2.461320161819458, "eval_runtime": 233.7755, "eval_samples_per_second": 427.761, "eval_steps_per_second": 13.368, "step": 19000 }, { "epoch": 0.18, "learning_rate": 4.817007939040183e-05, "loss": 2.6365, "step": 19500 }, { "epoch": 0.18, "eval_loss": 2.435065269470215, "eval_runtime": 238.7041, "eval_samples_per_second": 418.929, "eval_steps_per_second": 13.092, "step": 19500 }, { "epoch": 0.19, "learning_rate": 4.8123158349130086e-05, "loss": 2.6216, "step": 20000 }, { "epoch": 0.19, "eval_loss": 2.4164962768554688, "eval_runtime": 234.7619, "eval_samples_per_second": 425.964, "eval_steps_per_second": 13.311, "step": 20000 }, { "epoch": 0.19, "learning_rate": 4.807623730785834e-05, "loss": 2.6014, "step": 20500 }, { "epoch": 0.19, "eval_loss": 2.402653932571411, "eval_runtime": 234.6149, "eval_samples_per_second": 426.23, "eval_steps_per_second": 13.32, "step": 20500 }, { "epoch": 0.2, "learning_rate": 4.8029316266586586e-05, "loss": 2.5833, "step": 21000 }, { "epoch": 0.2, "eval_loss": 2.3744804859161377, "eval_runtime": 231.112, "eval_samples_per_second": 432.691, "eval_steps_per_second": 13.522, "step": 21000 }, { "epoch": 0.2, "learning_rate": 4.798239522531484e-05, "loss": 2.5663, "step": 21500 }, { "epoch": 0.2, "eval_loss": 2.35831880569458, "eval_runtime": 233.0868, "eval_samples_per_second": 429.025, "eval_steps_per_second": 13.407, "step": 21500 }, { "epoch": 0.21, "learning_rate": 4.793547418404309e-05, "loss": 2.5626, "step": 22000 }, { "epoch": 0.21, "eval_loss": 2.3559041023254395, "eval_runtime": 234.7406, "eval_samples_per_second": 426.002, "eval_steps_per_second": 13.313, "step": 22000 }, { "epoch": 0.21, "learning_rate": 4.7888553142771346e-05, "loss": 2.5453, "step": 22500 }, { "epoch": 0.21, "eval_loss": 2.3476722240448, "eval_runtime": 231.9484, "eval_samples_per_second": 431.13, "eval_steps_per_second": 13.473, "step": 22500 }, { "epoch": 0.22, "learning_rate": 4.78416321014996e-05, "loss": 2.492, "step": 23000 }, { "epoch": 0.22, "eval_loss": 2.3358097076416016, "eval_runtime": 231.7601, "eval_samples_per_second": 431.481, "eval_steps_per_second": 13.484, "step": 23000 }, { "epoch": 0.22, "learning_rate": 4.779471106022785e-05, "loss": 2.4961, "step": 23500 }, { "epoch": 0.22, "eval_loss": 2.31193208694458, "eval_runtime": 233.823, "eval_samples_per_second": 427.674, "eval_steps_per_second": 13.365, "step": 23500 }, { "epoch": 0.23, "learning_rate": 4.77477900189561e-05, "loss": 2.4764, "step": 24000 }, { "epoch": 0.23, "eval_loss": 2.2874138355255127, "eval_runtime": 232.1465, "eval_samples_per_second": 430.762, "eval_steps_per_second": 13.461, "step": 24000 }, { "epoch": 0.23, "learning_rate": 4.770086897768435e-05, "loss": 2.4725, "step": 24500 }, { "epoch": 0.23, "eval_loss": 2.2882509231567383, "eval_runtime": 233.6105, "eval_samples_per_second": 428.063, "eval_steps_per_second": 13.377, "step": 24500 }, { "epoch": 0.23, "learning_rate": 4.7653947936412606e-05, "loss": 2.4483, "step": 25000 }, { "epoch": 0.23, "eval_loss": 2.2681896686553955, "eval_runtime": 230.7325, "eval_samples_per_second": 433.402, "eval_steps_per_second": 13.544, "step": 25000 }, { "epoch": 0.24, "learning_rate": 4.760702689514086e-05, "loss": 2.4408, "step": 25500 }, { "epoch": 0.24, "eval_loss": 2.2558085918426514, "eval_runtime": 231.5715, "eval_samples_per_second": 431.832, "eval_steps_per_second": 13.495, "step": 25500 }, { "epoch": 0.24, "learning_rate": 4.756010585386911e-05, "loss": 2.4344, "step": 26000 }, { "epoch": 0.24, "eval_loss": 2.2374377250671387, "eval_runtime": 233.7469, "eval_samples_per_second": 427.813, "eval_steps_per_second": 13.369, "step": 26000 }, { "epoch": 0.25, "learning_rate": 4.751318481259736e-05, "loss": 2.4257, "step": 26500 }, { "epoch": 0.25, "eval_loss": 2.229942560195923, "eval_runtime": 231.8416, "eval_samples_per_second": 431.329, "eval_steps_per_second": 13.479, "step": 26500 }, { "epoch": 0.25, "learning_rate": 4.746626377132561e-05, "loss": 2.3836, "step": 27000 }, { "epoch": 0.25, "eval_loss": 2.2122104167938232, "eval_runtime": 232.1939, "eval_samples_per_second": 430.675, "eval_steps_per_second": 13.459, "step": 27000 }, { "epoch": 0.26, "learning_rate": 4.7419342730053865e-05, "loss": 2.3901, "step": 27500 }, { "epoch": 0.26, "eval_loss": 2.1925010681152344, "eval_runtime": 228.7288, "eval_samples_per_second": 437.199, "eval_steps_per_second": 13.662, "step": 27500 }, { "epoch": 0.26, "learning_rate": 4.737242168878212e-05, "loss": 2.3796, "step": 28000 }, { "epoch": 0.26, "eval_loss": 2.195193290710449, "eval_runtime": 227.6921, "eval_samples_per_second": 439.19, "eval_steps_per_second": 13.725, "step": 28000 }, { "epoch": 0.27, "learning_rate": 4.732550064751037e-05, "loss": 2.3792, "step": 28500 }, { "epoch": 0.27, "eval_loss": 2.178008794784546, "eval_runtime": 227.653, "eval_samples_per_second": 439.265, "eval_steps_per_second": 13.727, "step": 28500 }, { "epoch": 0.27, "learning_rate": 4.7278579606238625e-05, "loss": 2.3715, "step": 29000 }, { "epoch": 0.27, "eval_loss": 2.175689220428467, "eval_runtime": 227.4287, "eval_samples_per_second": 439.698, "eval_steps_per_second": 13.741, "step": 29000 }, { "epoch": 0.28, "learning_rate": 4.723165856496687e-05, "loss": 2.387, "step": 29500 }, { "epoch": 0.28, "eval_loss": 2.17189884185791, "eval_runtime": 227.5833, "eval_samples_per_second": 439.399, "eval_steps_per_second": 13.731, "step": 29500 }, { "epoch": 0.28, "learning_rate": 4.7184737523695125e-05, "loss": 2.3689, "step": 30000 }, { "epoch": 0.28, "eval_loss": 2.1487345695495605, "eval_runtime": 227.5611, "eval_samples_per_second": 439.442, "eval_steps_per_second": 13.733, "step": 30000 }, { "epoch": 0.29, "learning_rate": 4.713781648242338e-05, "loss": 2.3086, "step": 30500 }, { "epoch": 0.29, "eval_loss": 2.1480958461761475, "eval_runtime": 227.4844, "eval_samples_per_second": 439.591, "eval_steps_per_second": 13.737, "step": 30500 }, { "epoch": 0.29, "learning_rate": 4.709089544115163e-05, "loss": 2.3212, "step": 31000 }, { "epoch": 0.29, "eval_loss": 2.140958547592163, "eval_runtime": 227.7332, "eval_samples_per_second": 439.11, "eval_steps_per_second": 13.722, "step": 31000 }, { "epoch": 0.3, "learning_rate": 4.7043974399879885e-05, "loss": 2.321, "step": 31500 }, { "epoch": 0.3, "eval_loss": 2.1231191158294678, "eval_runtime": 227.7051, "eval_samples_per_second": 439.164, "eval_steps_per_second": 13.724, "step": 31500 }, { "epoch": 0.3, "learning_rate": 4.699705335860813e-05, "loss": 2.2855, "step": 32000 }, { "epoch": 0.3, "eval_loss": 2.1169111728668213, "eval_runtime": 227.254, "eval_samples_per_second": 440.036, "eval_steps_per_second": 13.751, "step": 32000 }, { "epoch": 0.3, "learning_rate": 4.6950132317336385e-05, "loss": 2.2906, "step": 32500 }, { "epoch": 0.3, "eval_loss": 2.104003667831421, "eval_runtime": 227.1649, "eval_samples_per_second": 440.209, "eval_steps_per_second": 13.757, "step": 32500 }, { "epoch": 0.31, "learning_rate": 4.690321127606464e-05, "loss": 2.2771, "step": 33000 }, { "epoch": 0.31, "eval_loss": 2.0972249507904053, "eval_runtime": 227.2504, "eval_samples_per_second": 440.043, "eval_steps_per_second": 13.751, "step": 33000 }, { "epoch": 0.31, "learning_rate": 4.685629023479289e-05, "loss": 2.2485, "step": 33500 }, { "epoch": 0.31, "eval_loss": 2.074732780456543, "eval_runtime": 227.2974, "eval_samples_per_second": 439.952, "eval_steps_per_second": 13.749, "step": 33500 }, { "epoch": 0.32, "learning_rate": 4.6809369193521144e-05, "loss": 2.2783, "step": 34000 }, { "epoch": 0.32, "eval_loss": 2.077274799346924, "eval_runtime": 227.3063, "eval_samples_per_second": 439.935, "eval_steps_per_second": 13.748, "step": 34000 }, { "epoch": 0.32, "learning_rate": 4.67624481522494e-05, "loss": 2.2668, "step": 34500 }, { "epoch": 0.32, "eval_loss": 2.075364828109741, "eval_runtime": 227.2985, "eval_samples_per_second": 439.95, "eval_steps_per_second": 13.748, "step": 34500 }, { "epoch": 0.33, "learning_rate": 4.6715527110977644e-05, "loss": 2.2458, "step": 35000 }, { "epoch": 0.33, "eval_loss": 2.060246467590332, "eval_runtime": 227.0553, "eval_samples_per_second": 440.421, "eval_steps_per_second": 13.763, "step": 35000 }, { "epoch": 0.33, "learning_rate": 4.66686060697059e-05, "loss": 2.2365, "step": 35500 }, { "epoch": 0.33, "eval_loss": 2.046921968460083, "eval_runtime": 227.3148, "eval_samples_per_second": 439.919, "eval_steps_per_second": 13.747, "step": 35500 }, { "epoch": 0.34, "learning_rate": 4.662168502843415e-05, "loss": 2.2652, "step": 36000 }, { "epoch": 0.34, "eval_loss": 2.049044370651245, "eval_runtime": 227.288, "eval_samples_per_second": 439.97, "eval_steps_per_second": 13.749, "step": 36000 }, { "epoch": 0.34, "learning_rate": 4.6574763987162404e-05, "loss": 2.2276, "step": 36500 }, { "epoch": 0.34, "eval_loss": 2.0362257957458496, "eval_runtime": 227.1732, "eval_samples_per_second": 440.193, "eval_steps_per_second": 13.756, "step": 36500 }, { "epoch": 0.35, "learning_rate": 4.652784294589066e-05, "loss": 2.226, "step": 37000 }, { "epoch": 0.35, "eval_loss": 2.0287837982177734, "eval_runtime": 227.1386, "eval_samples_per_second": 440.26, "eval_steps_per_second": 13.758, "step": 37000 }, { "epoch": 0.35, "learning_rate": 4.6480921904618904e-05, "loss": 2.2358, "step": 37500 }, { "epoch": 0.35, "eval_loss": 2.016014337539673, "eval_runtime": 227.3457, "eval_samples_per_second": 439.859, "eval_steps_per_second": 13.746, "step": 37500 }, { "epoch": 0.36, "learning_rate": 4.643400086334716e-05, "loss": 2.1975, "step": 38000 }, { "epoch": 0.36, "eval_loss": 2.0150697231292725, "eval_runtime": 227.4193, "eval_samples_per_second": 439.716, "eval_steps_per_second": 13.741, "step": 38000 }, { "epoch": 0.36, "learning_rate": 4.638707982207541e-05, "loss": 2.1991, "step": 38500 }, { "epoch": 0.36, "eval_loss": 2.007798910140991, "eval_runtime": 227.3815, "eval_samples_per_second": 439.789, "eval_steps_per_second": 13.743, "step": 38500 }, { "epoch": 0.37, "learning_rate": 4.6340158780803664e-05, "loss": 2.1907, "step": 39000 }, { "epoch": 0.37, "eval_loss": 2.0014853477478027, "eval_runtime": 227.3276, "eval_samples_per_second": 439.894, "eval_steps_per_second": 13.747, "step": 39000 }, { "epoch": 0.37, "learning_rate": 4.629323773953192e-05, "loss": 2.1551, "step": 39500 }, { "epoch": 0.37, "eval_loss": 2.0021867752075195, "eval_runtime": 227.2631, "eval_samples_per_second": 440.019, "eval_steps_per_second": 13.751, "step": 39500 }, { "epoch": 0.38, "learning_rate": 4.624631669826017e-05, "loss": 2.1818, "step": 40000 }, { "epoch": 0.38, "eval_loss": 1.9883029460906982, "eval_runtime": 227.0668, "eval_samples_per_second": 440.399, "eval_steps_per_second": 13.762, "step": 40000 }, { "epoch": 0.38, "learning_rate": 4.619939565698842e-05, "loss": 2.1678, "step": 40500 }, { "epoch": 0.38, "eval_loss": 1.9864903688430786, "eval_runtime": 226.6949, "eval_samples_per_second": 441.122, "eval_steps_per_second": 13.785, "step": 40500 }, { "epoch": 0.38, "learning_rate": 4.615247461571667e-05, "loss": 2.173, "step": 41000 }, { "epoch": 0.38, "eval_loss": 1.9766247272491455, "eval_runtime": 226.7708, "eval_samples_per_second": 440.974, "eval_steps_per_second": 13.78, "step": 41000 }, { "epoch": 0.39, "learning_rate": 4.6105553574444923e-05, "loss": 2.1469, "step": 41500 }, { "epoch": 0.39, "eval_loss": 1.962270975112915, "eval_runtime": 227.1551, "eval_samples_per_second": 440.228, "eval_steps_per_second": 13.757, "step": 41500 }, { "epoch": 0.39, "learning_rate": 4.605863253317318e-05, "loss": 2.1414, "step": 42000 }, { "epoch": 0.39, "eval_loss": 1.9634250402450562, "eval_runtime": 227.1053, "eval_samples_per_second": 440.324, "eval_steps_per_second": 13.76, "step": 42000 }, { "epoch": 0.4, "learning_rate": 4.601171149190143e-05, "loss": 2.156, "step": 42500 }, { "epoch": 0.4, "eval_loss": 1.9548449516296387, "eval_runtime": 226.8657, "eval_samples_per_second": 440.789, "eval_steps_per_second": 13.775, "step": 42500 }, { "epoch": 0.4, "learning_rate": 4.596479045062968e-05, "loss": 2.1155, "step": 43000 }, { "epoch": 0.4, "eval_loss": 1.9472512006759644, "eval_runtime": 227.1058, "eval_samples_per_second": 440.323, "eval_steps_per_second": 13.76, "step": 43000 }, { "epoch": 0.41, "learning_rate": 4.591786940935793e-05, "loss": 2.114, "step": 43500 }, { "epoch": 0.41, "eval_loss": 1.943420171737671, "eval_runtime": 227.1581, "eval_samples_per_second": 440.222, "eval_steps_per_second": 13.757, "step": 43500 }, { "epoch": 0.41, "learning_rate": 4.587094836808618e-05, "loss": 2.1146, "step": 44000 }, { "epoch": 0.41, "eval_loss": 1.9389326572418213, "eval_runtime": 227.1632, "eval_samples_per_second": 440.212, "eval_steps_per_second": 13.757, "step": 44000 }, { "epoch": 0.42, "learning_rate": 4.5824027326814436e-05, "loss": 2.1018, "step": 44500 }, { "epoch": 0.42, "eval_loss": 1.9317586421966553, "eval_runtime": 227.2543, "eval_samples_per_second": 440.036, "eval_steps_per_second": 13.751, "step": 44500 }, { "epoch": 0.42, "learning_rate": 4.577710628554269e-05, "loss": 2.0955, "step": 45000 }, { "epoch": 0.42, "eval_loss": 1.9257296323776245, "eval_runtime": 227.0518, "eval_samples_per_second": 440.428, "eval_steps_per_second": 13.763, "step": 45000 }, { "epoch": 0.43, "learning_rate": 4.573018524427094e-05, "loss": 2.0788, "step": 45500 }, { "epoch": 0.43, "eval_loss": 1.9265824556350708, "eval_runtime": 226.9844, "eval_samples_per_second": 440.559, "eval_steps_per_second": 13.767, "step": 45500 }, { "epoch": 0.43, "learning_rate": 4.5683264202999196e-05, "loss": 2.0983, "step": 46000 }, { "epoch": 0.43, "eval_loss": 1.9132778644561768, "eval_runtime": 226.9226, "eval_samples_per_second": 440.679, "eval_steps_per_second": 13.771, "step": 46000 }, { "epoch": 0.44, "learning_rate": 4.563634316172745e-05, "loss": 2.0877, "step": 46500 }, { "epoch": 0.44, "eval_loss": 1.9130772352218628, "eval_runtime": 227.0233, "eval_samples_per_second": 440.483, "eval_steps_per_second": 13.765, "step": 46500 }, { "epoch": 0.44, "learning_rate": 4.5589422120455696e-05, "loss": 2.0831, "step": 47000 }, { "epoch": 0.44, "eval_loss": 1.9047877788543701, "eval_runtime": 227.0434, "eval_samples_per_second": 440.444, "eval_steps_per_second": 13.764, "step": 47000 }, { "epoch": 0.45, "learning_rate": 4.554250107918395e-05, "loss": 2.0843, "step": 47500 }, { "epoch": 0.45, "eval_loss": 1.9129490852355957, "eval_runtime": 226.7451, "eval_samples_per_second": 441.024, "eval_steps_per_second": 13.782, "step": 47500 }, { "epoch": 0.45, "learning_rate": 4.54955800379122e-05, "loss": 2.0583, "step": 48000 }, { "epoch": 0.45, "eval_loss": 1.8967323303222656, "eval_runtime": 226.9943, "eval_samples_per_second": 440.54, "eval_steps_per_second": 13.767, "step": 48000 }, { "epoch": 0.46, "learning_rate": 4.5448658996640456e-05, "loss": 2.0814, "step": 48500 }, { "epoch": 0.46, "eval_loss": 1.8939955234527588, "eval_runtime": 226.7956, "eval_samples_per_second": 440.926, "eval_steps_per_second": 13.779, "step": 48500 }, { "epoch": 0.46, "learning_rate": 4.540173795536871e-05, "loss": 2.0656, "step": 49000 }, { "epoch": 0.46, "eval_loss": 1.8772351741790771, "eval_runtime": 226.9522, "eval_samples_per_second": 440.621, "eval_steps_per_second": 13.769, "step": 49000 }, { "epoch": 0.46, "learning_rate": 4.535481691409696e-05, "loss": 2.0654, "step": 49500 }, { "epoch": 0.46, "eval_loss": 1.8778467178344727, "eval_runtime": 227.1023, "eval_samples_per_second": 440.33, "eval_steps_per_second": 13.76, "step": 49500 }, { "epoch": 0.47, "learning_rate": 4.5307895872825216e-05, "loss": 2.0695, "step": 50000 }, { "epoch": 0.47, "eval_loss": 1.877779245376587, "eval_runtime": 226.8595, "eval_samples_per_second": 440.801, "eval_steps_per_second": 13.775, "step": 50000 }, { "epoch": 0.47, "learning_rate": 4.526097483155346e-05, "loss": 2.0571, "step": 50500 }, { "epoch": 0.47, "eval_loss": 1.878071904182434, "eval_runtime": 226.8228, "eval_samples_per_second": 440.873, "eval_steps_per_second": 13.777, "step": 50500 }, { "epoch": 0.48, "learning_rate": 4.5214053790281715e-05, "loss": 2.0379, "step": 51000 }, { "epoch": 0.48, "eval_loss": 1.8661781549453735, "eval_runtime": 226.9895, "eval_samples_per_second": 440.549, "eval_steps_per_second": 13.767, "step": 51000 }, { "epoch": 0.48, "learning_rate": 4.516713274900997e-05, "loss": 2.0439, "step": 51500 }, { "epoch": 0.48, "eval_loss": 1.8483119010925293, "eval_runtime": 227.0054, "eval_samples_per_second": 440.518, "eval_steps_per_second": 13.766, "step": 51500 }, { "epoch": 0.49, "learning_rate": 4.512021170773822e-05, "loss": 2.0472, "step": 52000 }, { "epoch": 0.49, "eval_loss": 1.8448209762573242, "eval_runtime": 226.9827, "eval_samples_per_second": 440.562, "eval_steps_per_second": 13.768, "step": 52000 }, { "epoch": 0.49, "learning_rate": 4.5073290666466475e-05, "loss": 2.0281, "step": 52500 }, { "epoch": 0.49, "eval_loss": 1.8498718738555908, "eval_runtime": 227.1635, "eval_samples_per_second": 440.212, "eval_steps_per_second": 13.757, "step": 52500 }, { "epoch": 0.5, "learning_rate": 4.502636962519473e-05, "loss": 2.0077, "step": 53000 }, { "epoch": 0.5, "eval_loss": 1.848965048789978, "eval_runtime": 226.7848, "eval_samples_per_second": 440.947, "eval_steps_per_second": 13.78, "step": 53000 }, { "epoch": 0.5, "learning_rate": 4.4979448583922975e-05, "loss": 2.0407, "step": 53500 }, { "epoch": 0.5, "eval_loss": 1.8440462350845337, "eval_runtime": 227.0578, "eval_samples_per_second": 440.416, "eval_steps_per_second": 13.763, "step": 53500 }, { "epoch": 0.51, "learning_rate": 4.493252754265123e-05, "loss": 2.0314, "step": 54000 }, { "epoch": 0.51, "eval_loss": 1.8299942016601562, "eval_runtime": 226.8309, "eval_samples_per_second": 440.857, "eval_steps_per_second": 13.777, "step": 54000 }, { "epoch": 0.51, "learning_rate": 4.488560650137948e-05, "loss": 2.0178, "step": 54500 }, { "epoch": 0.51, "eval_loss": 1.8236579895019531, "eval_runtime": 226.9072, "eval_samples_per_second": 440.709, "eval_steps_per_second": 13.772, "step": 54500 }, { "epoch": 0.52, "learning_rate": 4.4838685460107735e-05, "loss": 2.0133, "step": 55000 }, { "epoch": 0.52, "eval_loss": 1.826443076133728, "eval_runtime": 226.9793, "eval_samples_per_second": 440.569, "eval_steps_per_second": 13.768, "step": 55000 }, { "epoch": 0.52, "learning_rate": 4.479176441883599e-05, "loss": 2.0298, "step": 55500 }, { "epoch": 0.52, "eval_loss": 1.816024661064148, "eval_runtime": 226.7364, "eval_samples_per_second": 441.041, "eval_steps_per_second": 13.783, "step": 55500 }, { "epoch": 0.53, "learning_rate": 4.474484337756424e-05, "loss": 1.9824, "step": 56000 }, { "epoch": 0.53, "eval_loss": 1.8234096765518188, "eval_runtime": 226.9723, "eval_samples_per_second": 440.582, "eval_steps_per_second": 13.768, "step": 56000 }, { "epoch": 0.53, "learning_rate": 4.4697922336292495e-05, "loss": 1.9908, "step": 56500 }, { "epoch": 0.53, "eval_loss": 1.8151246309280396, "eval_runtime": 226.9337, "eval_samples_per_second": 440.657, "eval_steps_per_second": 13.771, "step": 56500 }, { "epoch": 0.53, "learning_rate": 4.465100129502074e-05, "loss": 1.9782, "step": 57000 }, { "epoch": 0.53, "eval_loss": 1.808193564414978, "eval_runtime": 226.7205, "eval_samples_per_second": 441.072, "eval_steps_per_second": 13.783, "step": 57000 }, { "epoch": 0.54, "learning_rate": 4.4604080253748995e-05, "loss": 1.9985, "step": 57500 }, { "epoch": 0.54, "eval_loss": 1.7979633808135986, "eval_runtime": 226.818, "eval_samples_per_second": 440.882, "eval_steps_per_second": 13.778, "step": 57500 }, { "epoch": 0.54, "learning_rate": 4.455715921247725e-05, "loss": 1.978, "step": 58000 }, { "epoch": 0.54, "eval_loss": 1.7955108880996704, "eval_runtime": 226.7306, "eval_samples_per_second": 441.052, "eval_steps_per_second": 13.783, "step": 58000 }, { "epoch": 0.55, "learning_rate": 4.45102381712055e-05, "loss": 1.9746, "step": 58500 }, { "epoch": 0.55, "eval_loss": 1.794771671295166, "eval_runtime": 226.9956, "eval_samples_per_second": 440.537, "eval_steps_per_second": 13.767, "step": 58500 }, { "epoch": 0.55, "learning_rate": 4.4463317129933754e-05, "loss": 1.9838, "step": 59000 }, { "epoch": 0.55, "eval_loss": 1.7896528244018555, "eval_runtime": 226.9957, "eval_samples_per_second": 440.537, "eval_steps_per_second": 13.767, "step": 59000 }, { "epoch": 0.56, "learning_rate": 4.4416396088662e-05, "loss": 1.944, "step": 59500 }, { "epoch": 0.56, "eval_loss": 1.7889646291732788, "eval_runtime": 227.1746, "eval_samples_per_second": 440.19, "eval_steps_per_second": 13.756, "step": 59500 }, { "epoch": 0.56, "learning_rate": 4.4369475047390254e-05, "loss": 1.965, "step": 60000 }, { "epoch": 0.56, "eval_loss": 1.7891310453414917, "eval_runtime": 227.2101, "eval_samples_per_second": 440.121, "eval_steps_per_second": 13.754, "step": 60000 }, { "epoch": 0.57, "learning_rate": 4.432255400611851e-05, "loss": 1.9501, "step": 60500 }, { "epoch": 0.57, "eval_loss": 1.7808892726898193, "eval_runtime": 226.9599, "eval_samples_per_second": 440.607, "eval_steps_per_second": 13.769, "step": 60500 }, { "epoch": 0.57, "learning_rate": 4.427563296484676e-05, "loss": 1.9541, "step": 61000 }, { "epoch": 0.57, "eval_loss": 1.7755423784255981, "eval_runtime": 226.9182, "eval_samples_per_second": 440.688, "eval_steps_per_second": 13.771, "step": 61000 }, { "epoch": 0.58, "learning_rate": 4.4228711923575014e-05, "loss": 1.9483, "step": 61500 }, { "epoch": 0.58, "eval_loss": 1.7737643718719482, "eval_runtime": 226.5336, "eval_samples_per_second": 441.436, "eval_steps_per_second": 13.795, "step": 61500 }, { "epoch": 0.58, "learning_rate": 4.418179088230327e-05, "loss": 1.9568, "step": 62000 }, { "epoch": 0.58, "eval_loss": 1.7714828252792358, "eval_runtime": 226.801, "eval_samples_per_second": 440.915, "eval_steps_per_second": 13.779, "step": 62000 }, { "epoch": 0.59, "learning_rate": 4.4134869841031514e-05, "loss": 1.9418, "step": 62500 }, { "epoch": 0.59, "eval_loss": 1.7680498361587524, "eval_runtime": 226.5426, "eval_samples_per_second": 441.418, "eval_steps_per_second": 13.794, "step": 62500 }, { "epoch": 0.59, "learning_rate": 4.408794879975977e-05, "loss": 1.946, "step": 63000 }, { "epoch": 0.59, "eval_loss": 1.756327509880066, "eval_runtime": 226.7739, "eval_samples_per_second": 440.968, "eval_steps_per_second": 13.78, "step": 63000 }, { "epoch": 0.6, "learning_rate": 4.404102775848802e-05, "loss": 1.9331, "step": 63500 }, { "epoch": 0.6, "eval_loss": 1.7607768774032593, "eval_runtime": 226.7955, "eval_samples_per_second": 440.926, "eval_steps_per_second": 13.779, "step": 63500 }, { "epoch": 0.6, "learning_rate": 4.3994106717216274e-05, "loss": 1.9287, "step": 64000 }, { "epoch": 0.6, "eval_loss": 1.7537603378295898, "eval_runtime": 226.7832, "eval_samples_per_second": 440.95, "eval_steps_per_second": 13.78, "step": 64000 }, { "epoch": 0.61, "learning_rate": 4.394718567594453e-05, "loss": 1.919, "step": 64500 }, { "epoch": 0.61, "eval_loss": 1.7554343938827515, "eval_runtime": 226.8184, "eval_samples_per_second": 440.881, "eval_steps_per_second": 13.778, "step": 64500 }, { "epoch": 0.61, "learning_rate": 4.390026463467278e-05, "loss": 1.9506, "step": 65000 }, { "epoch": 0.61, "eval_loss": 1.744460105895996, "eval_runtime": 226.7808, "eval_samples_per_second": 440.954, "eval_steps_per_second": 13.78, "step": 65000 }, { "epoch": 0.61, "learning_rate": 4.385334359340103e-05, "loss": 1.9358, "step": 65500 }, { "epoch": 0.61, "eval_loss": 1.7381893396377563, "eval_runtime": 226.843, "eval_samples_per_second": 440.833, "eval_steps_per_second": 13.776, "step": 65500 }, { "epoch": 0.62, "learning_rate": 4.380642255212928e-05, "loss": 1.9002, "step": 66000 }, { "epoch": 0.62, "eval_loss": 1.7369801998138428, "eval_runtime": 226.5756, "eval_samples_per_second": 441.354, "eval_steps_per_second": 13.792, "step": 66000 }, { "epoch": 0.62, "learning_rate": 4.3759501510857533e-05, "loss": 1.9122, "step": 66500 }, { "epoch": 0.62, "eval_loss": 1.7368305921554565, "eval_runtime": 226.767, "eval_samples_per_second": 440.981, "eval_steps_per_second": 13.781, "step": 66500 }, { "epoch": 0.63, "learning_rate": 4.371258046958579e-05, "loss": 1.9225, "step": 67000 }, { "epoch": 0.63, "eval_loss": 1.7325348854064941, "eval_runtime": 226.9377, "eval_samples_per_second": 440.65, "eval_steps_per_second": 13.77, "step": 67000 }, { "epoch": 0.63, "learning_rate": 4.366565942831404e-05, "loss": 1.9213, "step": 67500 }, { "epoch": 0.63, "eval_loss": 1.7244173288345337, "eval_runtime": 226.9908, "eval_samples_per_second": 440.547, "eval_steps_per_second": 13.767, "step": 67500 }, { "epoch": 0.64, "learning_rate": 4.3618738387042287e-05, "loss": 1.9084, "step": 68000 }, { "epoch": 0.64, "eval_loss": 1.725306510925293, "eval_runtime": 226.9071, "eval_samples_per_second": 440.709, "eval_steps_per_second": 13.772, "step": 68000 }, { "epoch": 0.64, "learning_rate": 4.357181734577054e-05, "loss": 1.9056, "step": 68500 }, { "epoch": 0.64, "eval_loss": 1.716600775718689, "eval_runtime": 226.8672, "eval_samples_per_second": 440.786, "eval_steps_per_second": 13.775, "step": 68500 }, { "epoch": 0.65, "learning_rate": 4.352489630449879e-05, "loss": 1.9175, "step": 69000 }, { "epoch": 0.65, "eval_loss": 1.7228271961212158, "eval_runtime": 226.6113, "eval_samples_per_second": 441.284, "eval_steps_per_second": 13.79, "step": 69000 }, { "epoch": 0.65, "learning_rate": 4.3477975263227046e-05, "loss": 1.8865, "step": 69500 }, { "epoch": 0.65, "eval_loss": 1.7209404706954956, "eval_runtime": 226.7014, "eval_samples_per_second": 441.109, "eval_steps_per_second": 13.785, "step": 69500 }, { "epoch": 0.66, "learning_rate": 4.34310542219553e-05, "loss": 1.8866, "step": 70000 }, { "epoch": 0.66, "eval_loss": 1.7054914236068726, "eval_runtime": 226.4302, "eval_samples_per_second": 441.637, "eval_steps_per_second": 13.801, "step": 70000 }, { "epoch": 0.66, "learning_rate": 4.338413318068355e-05, "loss": 1.8733, "step": 70500 }, { "epoch": 0.66, "eval_loss": 1.712384819984436, "eval_runtime": 226.2537, "eval_samples_per_second": 441.982, "eval_steps_per_second": 13.812, "step": 70500 }, { "epoch": 0.67, "learning_rate": 4.33372121394118e-05, "loss": 1.9032, "step": 71000 }, { "epoch": 0.67, "eval_loss": 1.6990084648132324, "eval_runtime": 225.9511, "eval_samples_per_second": 442.574, "eval_steps_per_second": 13.83, "step": 71000 }, { "epoch": 0.67, "learning_rate": 4.329029109814005e-05, "loss": 1.8713, "step": 71500 }, { "epoch": 0.67, "eval_loss": 1.6956583261489868, "eval_runtime": 226.2467, "eval_samples_per_second": 441.995, "eval_steps_per_second": 13.812, "step": 71500 }, { "epoch": 0.68, "learning_rate": 4.3243370056868306e-05, "loss": 1.8766, "step": 72000 }, { "epoch": 0.68, "eval_loss": 1.6951245069503784, "eval_runtime": 226.2025, "eval_samples_per_second": 442.082, "eval_steps_per_second": 13.815, "step": 72000 }, { "epoch": 0.68, "learning_rate": 4.319644901559656e-05, "loss": 1.9066, "step": 72500 }, { "epoch": 0.68, "eval_loss": 1.6982998847961426, "eval_runtime": 226.163, "eval_samples_per_second": 442.159, "eval_steps_per_second": 13.817, "step": 72500 }, { "epoch": 0.69, "learning_rate": 4.314952797432481e-05, "loss": 1.889, "step": 73000 }, { "epoch": 0.69, "eval_loss": 1.7046481370925903, "eval_runtime": 226.2165, "eval_samples_per_second": 442.054, "eval_steps_per_second": 13.814, "step": 73000 }, { "epoch": 0.69, "learning_rate": 4.310260693305306e-05, "loss": 1.8583, "step": 73500 }, { "epoch": 0.69, "eval_loss": 1.6921665668487549, "eval_runtime": 226.1668, "eval_samples_per_second": 442.152, "eval_steps_per_second": 13.817, "step": 73500 }, { "epoch": 0.69, "learning_rate": 4.305568589178131e-05, "loss": 1.846, "step": 74000 }, { "epoch": 0.69, "eval_loss": 1.6901285648345947, "eval_runtime": 225.9398, "eval_samples_per_second": 442.596, "eval_steps_per_second": 13.831, "step": 74000 }, { "epoch": 0.7, "learning_rate": 4.3008764850509566e-05, "loss": 1.8748, "step": 74500 }, { "epoch": 0.7, "eval_loss": 1.6798553466796875, "eval_runtime": 226.2378, "eval_samples_per_second": 442.013, "eval_steps_per_second": 13.813, "step": 74500 }, { "epoch": 0.7, "learning_rate": 4.296184380923782e-05, "loss": 1.8596, "step": 75000 }, { "epoch": 0.7, "eval_loss": 1.681208848953247, "eval_runtime": 226.5798, "eval_samples_per_second": 441.346, "eval_steps_per_second": 13.792, "step": 75000 }, { "epoch": 0.71, "learning_rate": 4.291492276796607e-05, "loss": 1.843, "step": 75500 }, { "epoch": 0.71, "eval_loss": 1.6777198314666748, "eval_runtime": 226.8856, "eval_samples_per_second": 440.751, "eval_steps_per_second": 13.773, "step": 75500 }, { "epoch": 0.71, "learning_rate": 4.2868001726694326e-05, "loss": 1.8458, "step": 76000 }, { "epoch": 0.71, "eval_loss": 1.6699376106262207, "eval_runtime": 226.7938, "eval_samples_per_second": 440.929, "eval_steps_per_second": 13.779, "step": 76000 }, { "epoch": 0.72, "learning_rate": 4.282108068542257e-05, "loss": 1.8544, "step": 76500 }, { "epoch": 0.72, "eval_loss": 1.6637232303619385, "eval_runtime": 226.7851, "eval_samples_per_second": 440.946, "eval_steps_per_second": 13.78, "step": 76500 }, { "epoch": 0.72, "learning_rate": 4.2774159644150825e-05, "loss": 1.8533, "step": 77000 }, { "epoch": 0.72, "eval_loss": 1.6666810512542725, "eval_runtime": 226.4743, "eval_samples_per_second": 441.551, "eval_steps_per_second": 13.798, "step": 77000 }, { "epoch": 0.73, "learning_rate": 4.272723860287908e-05, "loss": 1.8645, "step": 77500 }, { "epoch": 0.73, "eval_loss": 1.66965651512146, "eval_runtime": 226.5459, "eval_samples_per_second": 441.412, "eval_steps_per_second": 13.794, "step": 77500 }, { "epoch": 0.73, "learning_rate": 4.268031756160733e-05, "loss": 1.8535, "step": 78000 }, { "epoch": 0.73, "eval_loss": 1.665113091468811, "eval_runtime": 226.5387, "eval_samples_per_second": 441.426, "eval_steps_per_second": 13.795, "step": 78000 }, { "epoch": 0.74, "learning_rate": 4.2633396520335585e-05, "loss": 1.8425, "step": 78500 }, { "epoch": 0.74, "eval_loss": 1.655056118965149, "eval_runtime": 226.5932, "eval_samples_per_second": 441.319, "eval_steps_per_second": 13.791, "step": 78500 }, { "epoch": 0.74, "learning_rate": 4.258647547906383e-05, "loss": 1.8642, "step": 79000 }, { "epoch": 0.74, "eval_loss": 1.6554076671600342, "eval_runtime": 226.5747, "eval_samples_per_second": 441.356, "eval_steps_per_second": 13.792, "step": 79000 }, { "epoch": 0.75, "learning_rate": 4.2539554437792085e-05, "loss": 1.8259, "step": 79500 }, { "epoch": 0.75, "eval_loss": 1.6575286388397217, "eval_runtime": 226.3838, "eval_samples_per_second": 441.728, "eval_steps_per_second": 13.804, "step": 79500 }, { "epoch": 0.75, "learning_rate": 4.249263339652034e-05, "loss": 1.8199, "step": 80000 }, { "epoch": 0.75, "eval_loss": 1.6621437072753906, "eval_runtime": 226.5957, "eval_samples_per_second": 441.315, "eval_steps_per_second": 13.791, "step": 80000 }, { "epoch": 0.76, "learning_rate": 4.244571235524859e-05, "loss": 1.8384, "step": 80500 }, { "epoch": 0.76, "eval_loss": 1.6512243747711182, "eval_runtime": 226.3597, "eval_samples_per_second": 441.775, "eval_steps_per_second": 13.805, "step": 80500 }, { "epoch": 0.76, "learning_rate": 4.2398791313976845e-05, "loss": 1.8349, "step": 81000 }, { "epoch": 0.76, "eval_loss": 1.6566110849380493, "eval_runtime": 226.3468, "eval_samples_per_second": 441.8, "eval_steps_per_second": 13.806, "step": 81000 }, { "epoch": 0.76, "learning_rate": 4.23518702727051e-05, "loss": 1.8342, "step": 81500 }, { "epoch": 0.76, "eval_loss": 1.6478888988494873, "eval_runtime": 226.3371, "eval_samples_per_second": 441.819, "eval_steps_per_second": 13.807, "step": 81500 }, { "epoch": 0.77, "learning_rate": 4.2304949231433345e-05, "loss": 1.8508, "step": 82000 }, { "epoch": 0.77, "eval_loss": 1.6489554643630981, "eval_runtime": 226.3981, "eval_samples_per_second": 441.7, "eval_steps_per_second": 13.803, "step": 82000 }, { "epoch": 0.77, "learning_rate": 4.22580281901616e-05, "loss": 1.8135, "step": 82500 }, { "epoch": 0.77, "eval_loss": 1.6381429433822632, "eval_runtime": 226.3351, "eval_samples_per_second": 441.823, "eval_steps_per_second": 13.807, "step": 82500 }, { "epoch": 0.78, "learning_rate": 4.221110714888985e-05, "loss": 1.8332, "step": 83000 }, { "epoch": 0.78, "eval_loss": 1.6337202787399292, "eval_runtime": 226.073, "eval_samples_per_second": 442.335, "eval_steps_per_second": 13.823, "step": 83000 }, { "epoch": 0.78, "learning_rate": 4.2164186107618104e-05, "loss": 1.8315, "step": 83500 }, { "epoch": 0.78, "eval_loss": 1.637961506843567, "eval_runtime": 226.4701, "eval_samples_per_second": 441.559, "eval_steps_per_second": 13.799, "step": 83500 }, { "epoch": 0.79, "learning_rate": 4.211726506634636e-05, "loss": 1.8363, "step": 84000 }, { "epoch": 0.79, "eval_loss": 1.6368101835250854, "eval_runtime": 226.1996, "eval_samples_per_second": 442.087, "eval_steps_per_second": 13.815, "step": 84000 }, { "epoch": 0.79, "learning_rate": 4.2070344025074604e-05, "loss": 1.8248, "step": 84500 }, { "epoch": 0.79, "eval_loss": 1.626143455505371, "eval_runtime": 226.4446, "eval_samples_per_second": 441.609, "eval_steps_per_second": 13.8, "step": 84500 }, { "epoch": 0.8, "learning_rate": 4.202342298380286e-05, "loss": 1.8045, "step": 85000 }, { "epoch": 0.8, "eval_loss": 1.6303765773773193, "eval_runtime": 226.3791, "eval_samples_per_second": 441.737, "eval_steps_per_second": 13.804, "step": 85000 }, { "epoch": 0.8, "learning_rate": 4.197650194253111e-05, "loss": 1.8172, "step": 85500 }, { "epoch": 0.8, "eval_loss": 1.6250081062316895, "eval_runtime": 226.1683, "eval_samples_per_second": 442.149, "eval_steps_per_second": 13.817, "step": 85500 }, { "epoch": 0.81, "learning_rate": 4.1929580901259364e-05, "loss": 1.8161, "step": 86000 }, { "epoch": 0.81, "eval_loss": 1.615513801574707, "eval_runtime": 226.0153, "eval_samples_per_second": 442.448, "eval_steps_per_second": 13.826, "step": 86000 }, { "epoch": 0.81, "learning_rate": 4.188265985998762e-05, "loss": 1.7852, "step": 86500 }, { "epoch": 0.81, "eval_loss": 1.6129599809646606, "eval_runtime": 225.9925, "eval_samples_per_second": 442.493, "eval_steps_per_second": 13.828, "step": 86500 }, { "epoch": 0.82, "learning_rate": 4.183573881871587e-05, "loss": 1.7935, "step": 87000 }, { "epoch": 0.82, "eval_loss": 1.6187821626663208, "eval_runtime": 226.2683, "eval_samples_per_second": 441.953, "eval_steps_per_second": 13.811, "step": 87000 }, { "epoch": 0.82, "learning_rate": 4.178881777744412e-05, "loss": 1.791, "step": 87500 }, { "epoch": 0.82, "eval_loss": 1.6135245561599731, "eval_runtime": 226.3101, "eval_samples_per_second": 441.872, "eval_steps_per_second": 13.808, "step": 87500 }, { "epoch": 0.83, "learning_rate": 4.174189673617237e-05, "loss": 1.8019, "step": 88000 }, { "epoch": 0.83, "eval_loss": 1.6148468255996704, "eval_runtime": 225.9824, "eval_samples_per_second": 442.512, "eval_steps_per_second": 13.829, "step": 88000 }, { "epoch": 0.83, "learning_rate": 4.1694975694900624e-05, "loss": 1.7808, "step": 88500 }, { "epoch": 0.83, "eval_loss": 1.6106659173965454, "eval_runtime": 226.0203, "eval_samples_per_second": 442.438, "eval_steps_per_second": 13.826, "step": 88500 }, { "epoch": 0.84, "learning_rate": 4.164805465362888e-05, "loss": 1.7844, "step": 89000 }, { "epoch": 0.84, "eval_loss": 1.6136785745620728, "eval_runtime": 226.2859, "eval_samples_per_second": 441.919, "eval_steps_per_second": 13.81, "step": 89000 }, { "epoch": 0.84, "learning_rate": 4.160113361235713e-05, "loss": 1.774, "step": 89500 }, { "epoch": 0.84, "eval_loss": 1.6069365739822388, "eval_runtime": 226.2313, "eval_samples_per_second": 442.025, "eval_steps_per_second": 13.813, "step": 89500 }, { "epoch": 0.84, "learning_rate": 4.155421257108538e-05, "loss": 1.7877, "step": 90000 }, { "epoch": 0.84, "eval_loss": 1.6087855100631714, "eval_runtime": 226.3039, "eval_samples_per_second": 441.884, "eval_steps_per_second": 13.809, "step": 90000 }, { "epoch": 0.85, "learning_rate": 4.150729152981363e-05, "loss": 1.7757, "step": 90500 }, { "epoch": 0.85, "eval_loss": 1.590334177017212, "eval_runtime": 226.2681, "eval_samples_per_second": 441.954, "eval_steps_per_second": 13.811, "step": 90500 }, { "epoch": 0.85, "learning_rate": 4.1460370488541883e-05, "loss": 1.7731, "step": 91000 }, { "epoch": 0.85, "eval_loss": 1.5941905975341797, "eval_runtime": 226.0068, "eval_samples_per_second": 442.465, "eval_steps_per_second": 13.827, "step": 91000 }, { "epoch": 0.86, "learning_rate": 4.141344944727014e-05, "loss": 1.7639, "step": 91500 }, { "epoch": 0.86, "eval_loss": 1.6017308235168457, "eval_runtime": 226.0481, "eval_samples_per_second": 442.384, "eval_steps_per_second": 13.824, "step": 91500 }, { "epoch": 0.86, "learning_rate": 4.136652840599839e-05, "loss": 1.7889, "step": 92000 }, { "epoch": 0.86, "eval_loss": 1.592837929725647, "eval_runtime": 226.2492, "eval_samples_per_second": 441.991, "eval_steps_per_second": 13.812, "step": 92000 }, { "epoch": 0.87, "learning_rate": 4.131960736472664e-05, "loss": 1.7788, "step": 92500 }, { "epoch": 0.87, "eval_loss": 1.5879290103912354, "eval_runtime": 226.1946, "eval_samples_per_second": 442.097, "eval_steps_per_second": 13.816, "step": 92500 }, { "epoch": 0.87, "learning_rate": 4.127268632345489e-05, "loss": 1.7976, "step": 93000 }, { "epoch": 0.87, "eval_loss": 1.5884945392608643, "eval_runtime": 226.1587, "eval_samples_per_second": 442.167, "eval_steps_per_second": 13.818, "step": 93000 }, { "epoch": 0.88, "learning_rate": 4.122576528218314e-05, "loss": 1.7698, "step": 93500 }, { "epoch": 0.88, "eval_loss": 1.5887948274612427, "eval_runtime": 226.2905, "eval_samples_per_second": 441.91, "eval_steps_per_second": 13.81, "step": 93500 }, { "epoch": 0.88, "learning_rate": 4.1178844240911396e-05, "loss": 1.7684, "step": 94000 }, { "epoch": 0.88, "eval_loss": 1.576420783996582, "eval_runtime": 226.1872, "eval_samples_per_second": 442.112, "eval_steps_per_second": 13.816, "step": 94000 }, { "epoch": 0.89, "learning_rate": 4.113192319963965e-05, "loss": 1.7679, "step": 94500 }, { "epoch": 0.89, "eval_loss": 1.5760667324066162, "eval_runtime": 226.1526, "eval_samples_per_second": 442.179, "eval_steps_per_second": 13.818, "step": 94500 }, { "epoch": 0.89, "learning_rate": 4.10850021583679e-05, "loss": 1.7532, "step": 95000 }, { "epoch": 0.89, "eval_loss": 1.576305627822876, "eval_runtime": 226.1826, "eval_samples_per_second": 442.121, "eval_steps_per_second": 13.816, "step": 95000 }, { "epoch": 0.9, "learning_rate": 4.103808111709615e-05, "loss": 1.765, "step": 95500 }, { "epoch": 0.9, "eval_loss": 1.580979347229004, "eval_runtime": 226.2119, "eval_samples_per_second": 442.063, "eval_steps_per_second": 13.814, "step": 95500 }, { "epoch": 0.9, "learning_rate": 4.09911600758244e-05, "loss": 1.7429, "step": 96000 }, { "epoch": 0.9, "eval_loss": 1.5754783153533936, "eval_runtime": 226.2117, "eval_samples_per_second": 442.064, "eval_steps_per_second": 13.814, "step": 96000 }, { "epoch": 0.91, "learning_rate": 4.0944239034552656e-05, "loss": 1.7522, "step": 96500 }, { "epoch": 0.91, "eval_loss": 1.5753768682479858, "eval_runtime": 225.941, "eval_samples_per_second": 442.593, "eval_steps_per_second": 13.831, "step": 96500 }, { "epoch": 0.91, "learning_rate": 4.089731799328091e-05, "loss": 1.7401, "step": 97000 }, { "epoch": 0.91, "eval_loss": 1.5701993703842163, "eval_runtime": 225.8905, "eval_samples_per_second": 442.692, "eval_steps_per_second": 13.834, "step": 97000 }, { "epoch": 0.91, "learning_rate": 4.085039695200916e-05, "loss": 1.7533, "step": 97500 }, { "epoch": 0.91, "eval_loss": 1.5690381526947021, "eval_runtime": 226.1613, "eval_samples_per_second": 442.162, "eval_steps_per_second": 13.818, "step": 97500 }, { "epoch": 0.92, "learning_rate": 4.0803475910737416e-05, "loss": 1.7558, "step": 98000 }, { "epoch": 0.92, "eval_loss": 1.564376711845398, "eval_runtime": 226.2821, "eval_samples_per_second": 441.926, "eval_steps_per_second": 13.81, "step": 98000 }, { "epoch": 0.92, "learning_rate": 4.075655486946566e-05, "loss": 1.7256, "step": 98500 }, { "epoch": 0.92, "eval_loss": 1.562330722808838, "eval_runtime": 226.2172, "eval_samples_per_second": 442.053, "eval_steps_per_second": 13.814, "step": 98500 }, { "epoch": 0.93, "learning_rate": 4.0709633828193916e-05, "loss": 1.7487, "step": 99000 }, { "epoch": 0.93, "eval_loss": 1.5604215860366821, "eval_runtime": 225.9865, "eval_samples_per_second": 442.504, "eval_steps_per_second": 13.828, "step": 99000 }, { "epoch": 0.93, "learning_rate": 4.066271278692217e-05, "loss": 1.748, "step": 99500 }, { "epoch": 0.93, "eval_loss": 1.55665922164917, "eval_runtime": 226.1873, "eval_samples_per_second": 442.111, "eval_steps_per_second": 13.816, "step": 99500 }, { "epoch": 0.94, "learning_rate": 4.061579174565042e-05, "loss": 1.7337, "step": 100000 }, { "epoch": 0.94, "eval_loss": 1.5678207874298096, "eval_runtime": 226.1956, "eval_samples_per_second": 442.095, "eval_steps_per_second": 13.815, "step": 100000 }, { "epoch": 0.94, "learning_rate": 4.0568870704378675e-05, "loss": 1.7551, "step": 100500 }, { "epoch": 0.94, "eval_loss": 1.5610854625701904, "eval_runtime": 226.1994, "eval_samples_per_second": 442.088, "eval_steps_per_second": 13.815, "step": 100500 }, { "epoch": 0.95, "learning_rate": 4.052194966310692e-05, "loss": 1.7394, "step": 101000 }, { "epoch": 0.95, "eval_loss": 1.5528591871261597, "eval_runtime": 226.4258, "eval_samples_per_second": 441.646, "eval_steps_per_second": 13.801, "step": 101000 }, { "epoch": 0.95, "learning_rate": 4.0475028621835175e-05, "loss": 1.7447, "step": 101500 }, { "epoch": 0.95, "eval_loss": 1.5571315288543701, "eval_runtime": 226.3464, "eval_samples_per_second": 441.801, "eval_steps_per_second": 13.806, "step": 101500 }, { "epoch": 0.96, "learning_rate": 4.042810758056343e-05, "loss": 1.7411, "step": 102000 }, { "epoch": 0.96, "eval_loss": 1.5496081113815308, "eval_runtime": 226.2985, "eval_samples_per_second": 441.894, "eval_steps_per_second": 13.809, "step": 102000 }, { "epoch": 0.96, "learning_rate": 4.038118653929168e-05, "loss": 1.7496, "step": 102500 }, { "epoch": 0.96, "eval_loss": 1.551464319229126, "eval_runtime": 226.3296, "eval_samples_per_second": 441.834, "eval_steps_per_second": 13.807, "step": 102500 }, { "epoch": 0.97, "learning_rate": 4.0334265498019935e-05, "loss": 1.7381, "step": 103000 }, { "epoch": 0.97, "eval_loss": 1.5396504402160645, "eval_runtime": 225.928, "eval_samples_per_second": 442.619, "eval_steps_per_second": 13.832, "step": 103000 }, { "epoch": 0.97, "learning_rate": 4.028734445674819e-05, "loss": 1.7513, "step": 103500 }, { "epoch": 0.97, "eval_loss": 1.5501888990402222, "eval_runtime": 226.1976, "eval_samples_per_second": 442.091, "eval_steps_per_second": 13.815, "step": 103500 }, { "epoch": 0.98, "learning_rate": 4.0240423415476435e-05, "loss": 1.7126, "step": 104000 }, { "epoch": 0.98, "eval_loss": 1.5375173091888428, "eval_runtime": 226.1628, "eval_samples_per_second": 442.159, "eval_steps_per_second": 13.817, "step": 104000 }, { "epoch": 0.98, "learning_rate": 4.019350237420469e-05, "loss": 1.7644, "step": 104500 }, { "epoch": 0.98, "eval_loss": 1.5411220788955688, "eval_runtime": 226.1986, "eval_samples_per_second": 442.089, "eval_steps_per_second": 13.815, "step": 104500 }, { "epoch": 0.99, "learning_rate": 4.014658133293294e-05, "loss": 1.7142, "step": 105000 }, { "epoch": 0.99, "eval_loss": 1.5340408086776733, "eval_runtime": 226.2255, "eval_samples_per_second": 442.037, "eval_steps_per_second": 13.814, "step": 105000 }, { "epoch": 0.99, "learning_rate": 4.0099660291661195e-05, "loss": 1.7053, "step": 105500 }, { "epoch": 0.99, "eval_loss": 1.5434354543685913, "eval_runtime": 226.2654, "eval_samples_per_second": 441.959, "eval_steps_per_second": 13.811, "step": 105500 }, { "epoch": 0.99, "learning_rate": 4.005273925038945e-05, "loss": 1.7352, "step": 106000 }, { "epoch": 0.99, "eval_loss": 1.5321134328842163, "eval_runtime": 226.0566, "eval_samples_per_second": 442.367, "eval_steps_per_second": 13.824, "step": 106000 }, { "epoch": 1.0, "learning_rate": 4.00058182091177e-05, "loss": 1.7152, "step": 106500 }, { "epoch": 1.0, "eval_loss": 1.5397337675094604, "eval_runtime": 226.363, "eval_samples_per_second": 441.768, "eval_steps_per_second": 13.805, "step": 106500 }, { "epoch": 1.0, "learning_rate": 3.995889716784595e-05, "loss": 1.6927, "step": 107000 }, { "epoch": 1.0, "eval_loss": 1.542881727218628, "eval_runtime": 226.3007, "eval_samples_per_second": 441.89, "eval_steps_per_second": 13.809, "step": 107000 }, { "epoch": 1.01, "learning_rate": 3.99119761265742e-05, "loss": 1.7013, "step": 107500 }, { "epoch": 1.01, "eval_loss": 1.528303861618042, "eval_runtime": 226.2289, "eval_samples_per_second": 442.03, "eval_steps_per_second": 13.813, "step": 107500 }, { "epoch": 1.01, "learning_rate": 3.9865055085302454e-05, "loss": 1.7318, "step": 108000 }, { "epoch": 1.01, "eval_loss": 1.5268659591674805, "eval_runtime": 226.1768, "eval_samples_per_second": 442.132, "eval_steps_per_second": 13.817, "step": 108000 }, { "epoch": 1.02, "learning_rate": 3.981813404403071e-05, "loss": 1.6903, "step": 108500 }, { "epoch": 1.02, "eval_loss": 1.528219223022461, "eval_runtime": 226.4877, "eval_samples_per_second": 441.525, "eval_steps_per_second": 13.798, "step": 108500 }, { "epoch": 1.02, "learning_rate": 3.977121300275896e-05, "loss": 1.7105, "step": 109000 }, { "epoch": 1.02, "eval_loss": 1.5189323425292969, "eval_runtime": 226.1979, "eval_samples_per_second": 442.091, "eval_steps_per_second": 13.815, "step": 109000 }, { "epoch": 1.03, "learning_rate": 3.972429196148721e-05, "loss": 1.6979, "step": 109500 }, { "epoch": 1.03, "eval_loss": 1.5255582332611084, "eval_runtime": 226.4286, "eval_samples_per_second": 441.64, "eval_steps_per_second": 13.801, "step": 109500 }, { "epoch": 1.03, "learning_rate": 3.967737092021546e-05, "loss": 1.6922, "step": 110000 }, { "epoch": 1.03, "eval_loss": 1.5171970129013062, "eval_runtime": 226.4396, "eval_samples_per_second": 441.619, "eval_steps_per_second": 13.801, "step": 110000 }, { "epoch": 1.04, "learning_rate": 3.9630449878943714e-05, "loss": 1.7161, "step": 110500 }, { "epoch": 1.04, "eval_loss": 1.517843246459961, "eval_runtime": 226.4686, "eval_samples_per_second": 441.562, "eval_steps_per_second": 13.799, "step": 110500 }, { "epoch": 1.04, "learning_rate": 3.958352883767197e-05, "loss": 1.719, "step": 111000 }, { "epoch": 1.04, "eval_loss": 1.5142602920532227, "eval_runtime": 226.7344, "eval_samples_per_second": 441.045, "eval_steps_per_second": 13.783, "step": 111000 }, { "epoch": 1.05, "learning_rate": 3.953660779640022e-05, "loss": 1.6857, "step": 111500 }, { "epoch": 1.05, "eval_loss": 1.5159624814987183, "eval_runtime": 226.7078, "eval_samples_per_second": 441.096, "eval_steps_per_second": 13.784, "step": 111500 }, { "epoch": 1.05, "learning_rate": 3.9489686755128474e-05, "loss": 1.6983, "step": 112000 }, { "epoch": 1.05, "eval_loss": 1.510577917098999, "eval_runtime": 226.2373, "eval_samples_per_second": 442.014, "eval_steps_per_second": 13.813, "step": 112000 }, { "epoch": 1.06, "learning_rate": 3.944276571385672e-05, "loss": 1.6838, "step": 112500 }, { "epoch": 1.06, "eval_loss": 1.5174448490142822, "eval_runtime": 226.647, "eval_samples_per_second": 441.215, "eval_steps_per_second": 13.788, "step": 112500 }, { "epoch": 1.06, "learning_rate": 3.9395844672584974e-05, "loss": 1.6936, "step": 113000 }, { "epoch": 1.06, "eval_loss": 1.5134094953536987, "eval_runtime": 226.7825, "eval_samples_per_second": 440.951, "eval_steps_per_second": 13.78, "step": 113000 }, { "epoch": 1.07, "learning_rate": 3.934892363131323e-05, "loss": 1.675, "step": 113500 }, { "epoch": 1.07, "eval_loss": 1.5090696811676025, "eval_runtime": 226.8241, "eval_samples_per_second": 440.87, "eval_steps_per_second": 13.777, "step": 113500 }, { "epoch": 1.07, "learning_rate": 3.930200259004148e-05, "loss": 1.6706, "step": 114000 }, { "epoch": 1.07, "eval_loss": 1.5159416198730469, "eval_runtime": 226.8746, "eval_samples_per_second": 440.772, "eval_steps_per_second": 13.774, "step": 114000 }, { "epoch": 1.07, "learning_rate": 3.9255081548769734e-05, "loss": 1.6763, "step": 114500 }, { "epoch": 1.07, "eval_loss": 1.4984365701675415, "eval_runtime": 226.8627, "eval_samples_per_second": 440.795, "eval_steps_per_second": 13.775, "step": 114500 }, { "epoch": 1.08, "learning_rate": 3.920816050749798e-05, "loss": 1.6888, "step": 115000 }, { "epoch": 1.08, "eval_loss": 1.504223108291626, "eval_runtime": 228.3239, "eval_samples_per_second": 437.974, "eval_steps_per_second": 13.687, "step": 115000 }, { "epoch": 1.08, "learning_rate": 3.916123946622623e-05, "loss": 1.6627, "step": 115500 }, { "epoch": 1.08, "eval_loss": 1.5011541843414307, "eval_runtime": 226.9808, "eval_samples_per_second": 440.566, "eval_steps_per_second": 13.768, "step": 115500 }, { "epoch": 1.09, "learning_rate": 3.911431842495449e-05, "loss": 1.6739, "step": 116000 }, { "epoch": 1.09, "eval_loss": 1.5042829513549805, "eval_runtime": 227.232, "eval_samples_per_second": 440.079, "eval_steps_per_second": 13.752, "step": 116000 }, { "epoch": 1.09, "learning_rate": 3.906739738368274e-05, "loss": 1.6465, "step": 116500 }, { "epoch": 1.09, "eval_loss": 1.496393084526062, "eval_runtime": 227.0422, "eval_samples_per_second": 440.447, "eval_steps_per_second": 13.764, "step": 116500 }, { "epoch": 1.1, "learning_rate": 3.902047634241099e-05, "loss": 1.681, "step": 117000 }, { "epoch": 1.1, "eval_loss": 1.501774549484253, "eval_runtime": 226.8713, "eval_samples_per_second": 440.779, "eval_steps_per_second": 13.774, "step": 117000 }, { "epoch": 1.1, "learning_rate": 3.8973555301139247e-05, "loss": 1.6674, "step": 117500 }, { "epoch": 1.1, "eval_loss": 1.492223858833313, "eval_runtime": 227.2955, "eval_samples_per_second": 439.956, "eval_steps_per_second": 13.749, "step": 117500 }, { "epoch": 1.11, "learning_rate": 3.892663425986749e-05, "loss": 1.6808, "step": 118000 }, { "epoch": 1.11, "eval_loss": 1.5016900300979614, "eval_runtime": 227.0887, "eval_samples_per_second": 440.356, "eval_steps_per_second": 13.761, "step": 118000 }, { "epoch": 1.11, "learning_rate": 3.8879713218595746e-05, "loss": 1.6687, "step": 118500 }, { "epoch": 1.11, "eval_loss": 1.4928853511810303, "eval_runtime": 235.0541, "eval_samples_per_second": 425.434, "eval_steps_per_second": 13.295, "step": 118500 }, { "epoch": 1.12, "learning_rate": 3.8832792177324e-05, "loss": 1.6711, "step": 119000 }, { "epoch": 1.12, "eval_loss": 1.4951835870742798, "eval_runtime": 231.8391, "eval_samples_per_second": 431.334, "eval_steps_per_second": 13.479, "step": 119000 }, { "epoch": 1.12, "learning_rate": 3.878587113605225e-05, "loss": 1.6629, "step": 119500 }, { "epoch": 1.12, "eval_loss": 1.49068284034729, "eval_runtime": 232.7614, "eval_samples_per_second": 429.624, "eval_steps_per_second": 13.426, "step": 119500 }, { "epoch": 1.13, "learning_rate": 3.8738950094780506e-05, "loss": 1.6762, "step": 120000 }, { "epoch": 1.13, "eval_loss": 1.497752070426941, "eval_runtime": 226.4934, "eval_samples_per_second": 441.514, "eval_steps_per_second": 13.797, "step": 120000 }, { "epoch": 1.13, "learning_rate": 3.869202905350875e-05, "loss": 1.6806, "step": 120500 }, { "epoch": 1.13, "eval_loss": 1.5004570484161377, "eval_runtime": 231.6639, "eval_samples_per_second": 431.66, "eval_steps_per_second": 13.489, "step": 120500 }, { "epoch": 1.14, "learning_rate": 3.8645108012237006e-05, "loss": 1.6322, "step": 121000 }, { "epoch": 1.14, "eval_loss": 1.491471767425537, "eval_runtime": 223.6465, "eval_samples_per_second": 447.134, "eval_steps_per_second": 13.973, "step": 121000 }, { "epoch": 1.14, "learning_rate": 3.859818697096526e-05, "loss": 1.6823, "step": 121500 }, { "epoch": 1.14, "eval_loss": 1.4876571893692017, "eval_runtime": 223.7062, "eval_samples_per_second": 447.015, "eval_steps_per_second": 13.969, "step": 121500 }, { "epoch": 1.14, "learning_rate": 3.855126592969351e-05, "loss": 1.6659, "step": 122000 }, { "epoch": 1.14, "eval_loss": 1.4812325239181519, "eval_runtime": 223.6584, "eval_samples_per_second": 447.11, "eval_steps_per_second": 13.972, "step": 122000 }, { "epoch": 1.15, "learning_rate": 3.8504344888421766e-05, "loss": 1.6801, "step": 122500 }, { "epoch": 1.15, "eval_loss": 1.4771298170089722, "eval_runtime": 223.7401, "eval_samples_per_second": 446.947, "eval_steps_per_second": 13.967, "step": 122500 }, { "epoch": 1.15, "learning_rate": 3.845742384715002e-05, "loss": 1.6685, "step": 123000 }, { "epoch": 1.15, "eval_loss": 1.473673939704895, "eval_runtime": 223.7681, "eval_samples_per_second": 446.891, "eval_steps_per_second": 13.965, "step": 123000 }, { "epoch": 1.16, "learning_rate": 3.8410502805878266e-05, "loss": 1.6686, "step": 123500 }, { "epoch": 1.16, "eval_loss": 1.4780343770980835, "eval_runtime": 223.8692, "eval_samples_per_second": 446.689, "eval_steps_per_second": 13.959, "step": 123500 }, { "epoch": 1.16, "learning_rate": 3.836358176460652e-05, "loss": 1.6603, "step": 124000 }, { "epoch": 1.16, "eval_loss": 1.478870153427124, "eval_runtime": 237.4256, "eval_samples_per_second": 421.185, "eval_steps_per_second": 13.162, "step": 124000 }, { "epoch": 1.17, "learning_rate": 3.831666072333477e-05, "loss": 1.6402, "step": 124500 }, { "epoch": 1.17, "eval_loss": 1.4783562421798706, "eval_runtime": 236.4563, "eval_samples_per_second": 422.911, "eval_steps_per_second": 13.216, "step": 124500 }, { "epoch": 1.17, "learning_rate": 3.8269739682063025e-05, "loss": 1.6585, "step": 125000 }, { "epoch": 1.17, "eval_loss": 1.465975046157837, "eval_runtime": 236.3659, "eval_samples_per_second": 423.073, "eval_steps_per_second": 13.221, "step": 125000 }, { "epoch": 1.18, "learning_rate": 3.822281864079128e-05, "loss": 1.6569, "step": 125500 }, { "epoch": 1.18, "eval_loss": 1.4764187335968018, "eval_runtime": 224.8017, "eval_samples_per_second": 444.837, "eval_steps_per_second": 13.901, "step": 125500 }, { "epoch": 1.18, "learning_rate": 3.8175897599519525e-05, "loss": 1.6557, "step": 126000 }, { "epoch": 1.18, "eval_loss": 1.4717729091644287, "eval_runtime": 221.9623, "eval_samples_per_second": 450.527, "eval_steps_per_second": 14.079, "step": 126000 }, { "epoch": 1.19, "learning_rate": 3.812897655824778e-05, "loss": 1.6409, "step": 126500 }, { "epoch": 1.19, "eval_loss": 1.4730979204177856, "eval_runtime": 221.9654, "eval_samples_per_second": 450.521, "eval_steps_per_second": 14.079, "step": 126500 }, { "epoch": 1.19, "learning_rate": 3.808205551697603e-05, "loss": 1.6487, "step": 127000 }, { "epoch": 1.19, "eval_loss": 1.4701308012008667, "eval_runtime": 222.0375, "eval_samples_per_second": 450.374, "eval_steps_per_second": 14.074, "step": 127000 }, { "epoch": 1.2, "learning_rate": 3.8035134475704285e-05, "loss": 1.645, "step": 127500 }, { "epoch": 1.2, "eval_loss": 1.4616634845733643, "eval_runtime": 222.0725, "eval_samples_per_second": 450.303, "eval_steps_per_second": 14.072, "step": 127500 }, { "epoch": 1.2, "learning_rate": 3.798821343443254e-05, "loss": 1.6402, "step": 128000 }, { "epoch": 1.2, "eval_loss": 1.4659932851791382, "eval_runtime": 222.1612, "eval_samples_per_second": 450.124, "eval_steps_per_second": 14.066, "step": 128000 }, { "epoch": 1.21, "learning_rate": 3.794129239316079e-05, "loss": 1.6373, "step": 128500 }, { "epoch": 1.21, "eval_loss": 1.4619441032409668, "eval_runtime": 222.2305, "eval_samples_per_second": 449.983, "eval_steps_per_second": 14.062, "step": 128500 }, { "epoch": 1.21, "learning_rate": 3.789437135188904e-05, "loss": 1.6473, "step": 129000 }, { "epoch": 1.21, "eval_loss": 1.4622013568878174, "eval_runtime": 222.2288, "eval_samples_per_second": 449.987, "eval_steps_per_second": 14.062, "step": 129000 }, { "epoch": 1.22, "learning_rate": 3.784745031061729e-05, "loss": 1.6421, "step": 129500 }, { "epoch": 1.22, "eval_loss": 1.4600987434387207, "eval_runtime": 222.2845, "eval_samples_per_second": 449.874, "eval_steps_per_second": 14.059, "step": 129500 }, { "epoch": 1.22, "learning_rate": 3.7800529269345545e-05, "loss": 1.6401, "step": 130000 }, { "epoch": 1.22, "eval_loss": 1.4546868801116943, "eval_runtime": 229.4754, "eval_samples_per_second": 435.777, "eval_steps_per_second": 13.618, "step": 130000 }, { "epoch": 1.22, "learning_rate": 3.77536082280738e-05, "loss": 1.6446, "step": 130500 }, { "epoch": 1.22, "eval_loss": 1.4492428302764893, "eval_runtime": 228.4821, "eval_samples_per_second": 437.671, "eval_steps_per_second": 13.677, "step": 130500 }, { "epoch": 1.23, "learning_rate": 3.770668718680205e-05, "loss": 1.6274, "step": 131000 }, { "epoch": 1.23, "eval_loss": 1.4530912637710571, "eval_runtime": 228.7476, "eval_samples_per_second": 437.163, "eval_steps_per_second": 13.661, "step": 131000 }, { "epoch": 1.23, "learning_rate": 3.76597661455303e-05, "loss": 1.6548, "step": 131500 }, { "epoch": 1.23, "eval_loss": 1.453102946281433, "eval_runtime": 232.9785, "eval_samples_per_second": 429.224, "eval_steps_per_second": 13.413, "step": 131500 }, { "epoch": 1.24, "learning_rate": 3.761284510425855e-05, "loss": 1.6128, "step": 132000 }, { "epoch": 1.24, "eval_loss": 1.445427656173706, "eval_runtime": 232.8681, "eval_samples_per_second": 429.428, "eval_steps_per_second": 13.42, "step": 132000 }, { "epoch": 1.24, "learning_rate": 3.7565924062986804e-05, "loss": 1.6217, "step": 132500 }, { "epoch": 1.24, "eval_loss": 1.4480894804000854, "eval_runtime": 229.5409, "eval_samples_per_second": 435.652, "eval_steps_per_second": 13.614, "step": 132500 }, { "epoch": 1.25, "learning_rate": 3.751900302171506e-05, "loss": 1.6292, "step": 133000 }, { "epoch": 1.25, "eval_loss": 1.4402817487716675, "eval_runtime": 223.3255, "eval_samples_per_second": 447.777, "eval_steps_per_second": 13.993, "step": 133000 }, { "epoch": 1.25, "learning_rate": 3.747208198044331e-05, "loss": 1.6262, "step": 133500 }, { "epoch": 1.25, "eval_loss": 1.4454220533370972, "eval_runtime": 223.1317, "eval_samples_per_second": 448.166, "eval_steps_per_second": 14.005, "step": 133500 }, { "epoch": 1.26, "learning_rate": 3.7425160939171564e-05, "loss": 1.6355, "step": 134000 }, { "epoch": 1.26, "eval_loss": 1.4490951299667358, "eval_runtime": 222.8039, "eval_samples_per_second": 448.825, "eval_steps_per_second": 14.026, "step": 134000 }, { "epoch": 1.26, "learning_rate": 3.737823989789981e-05, "loss": 1.6361, "step": 134500 }, { "epoch": 1.26, "eval_loss": 1.446262001991272, "eval_runtime": 222.7683, "eval_samples_per_second": 448.897, "eval_steps_per_second": 14.028, "step": 134500 }, { "epoch": 1.27, "learning_rate": 3.7331318856628064e-05, "loss": 1.6147, "step": 135000 }, { "epoch": 1.27, "eval_loss": 1.4396119117736816, "eval_runtime": 222.7955, "eval_samples_per_second": 448.842, "eval_steps_per_second": 14.026, "step": 135000 }, { "epoch": 1.27, "learning_rate": 3.728439781535632e-05, "loss": 1.6442, "step": 135500 }, { "epoch": 1.27, "eval_loss": 1.4377689361572266, "eval_runtime": 222.8075, "eval_samples_per_second": 448.818, "eval_steps_per_second": 14.026, "step": 135500 }, { "epoch": 1.28, "learning_rate": 3.723747677408457e-05, "loss": 1.6069, "step": 136000 }, { "epoch": 1.28, "eval_loss": 1.436558723449707, "eval_runtime": 222.7834, "eval_samples_per_second": 448.867, "eval_steps_per_second": 14.027, "step": 136000 }, { "epoch": 1.28, "learning_rate": 3.7190555732812824e-05, "loss": 1.6183, "step": 136500 }, { "epoch": 1.28, "eval_loss": 1.4427673816680908, "eval_runtime": 222.8527, "eval_samples_per_second": 448.727, "eval_steps_per_second": 14.023, "step": 136500 }, { "epoch": 1.29, "learning_rate": 3.714363469154107e-05, "loss": 1.6183, "step": 137000 }, { "epoch": 1.29, "eval_loss": 1.4434157609939575, "eval_runtime": 222.9556, "eval_samples_per_second": 448.52, "eval_steps_per_second": 14.016, "step": 137000 }, { "epoch": 1.29, "learning_rate": 3.7096713650269324e-05, "loss": 1.6068, "step": 137500 }, { "epoch": 1.29, "eval_loss": 1.4364444017410278, "eval_runtime": 222.8985, "eval_samples_per_second": 448.635, "eval_steps_per_second": 14.02, "step": 137500 }, { "epoch": 1.3, "learning_rate": 3.704979260899758e-05, "loss": 1.6363, "step": 138000 }, { "epoch": 1.3, "eval_loss": 1.4355006217956543, "eval_runtime": 222.8369, "eval_samples_per_second": 448.759, "eval_steps_per_second": 14.024, "step": 138000 }, { "epoch": 1.3, "learning_rate": 3.700287156772583e-05, "loss": 1.6119, "step": 138500 }, { "epoch": 1.3, "eval_loss": 1.4310338497161865, "eval_runtime": 222.7256, "eval_samples_per_second": 448.983, "eval_steps_per_second": 14.031, "step": 138500 }, { "epoch": 1.3, "learning_rate": 3.6955950526454084e-05, "loss": 1.6483, "step": 139000 }, { "epoch": 1.3, "eval_loss": 1.4362512826919556, "eval_runtime": 222.6329, "eval_samples_per_second": 449.17, "eval_steps_per_second": 14.037, "step": 139000 }, { "epoch": 1.31, "learning_rate": 3.690902948518234e-05, "loss": 1.612, "step": 139500 }, { "epoch": 1.31, "eval_loss": 1.4332929849624634, "eval_runtime": 222.5859, "eval_samples_per_second": 449.265, "eval_steps_per_second": 14.04, "step": 139500 }, { "epoch": 1.31, "learning_rate": 3.686210844391059e-05, "loss": 1.61, "step": 140000 }, { "epoch": 1.31, "eval_loss": 1.4201843738555908, "eval_runtime": 226.6701, "eval_samples_per_second": 441.17, "eval_steps_per_second": 13.787, "step": 140000 }, { "epoch": 1.32, "learning_rate": 3.681518740263884e-05, "loss": 1.6236, "step": 140500 }, { "epoch": 1.32, "eval_loss": 1.4394793510437012, "eval_runtime": 234.5172, "eval_samples_per_second": 426.408, "eval_steps_per_second": 13.325, "step": 140500 }, { "epoch": 1.32, "learning_rate": 3.676826636136709e-05, "loss": 1.6185, "step": 141000 }, { "epoch": 1.32, "eval_loss": 1.433275818824768, "eval_runtime": 234.8559, "eval_samples_per_second": 425.793, "eval_steps_per_second": 13.306, "step": 141000 }, { "epoch": 1.33, "learning_rate": 3.672134532009534e-05, "loss": 1.6061, "step": 141500 }, { "epoch": 1.33, "eval_loss": 1.418621301651001, "eval_runtime": 236.7519, "eval_samples_per_second": 422.383, "eval_steps_per_second": 13.199, "step": 141500 }, { "epoch": 1.33, "learning_rate": 3.6674424278823596e-05, "loss": 1.6104, "step": 142000 }, { "epoch": 1.33, "eval_loss": 1.4307106733322144, "eval_runtime": 235.4095, "eval_samples_per_second": 424.792, "eval_steps_per_second": 13.275, "step": 142000 }, { "epoch": 1.34, "learning_rate": 3.662750323755185e-05, "loss": 1.6079, "step": 142500 }, { "epoch": 1.34, "eval_loss": 1.4250682592391968, "eval_runtime": 235.4454, "eval_samples_per_second": 424.727, "eval_steps_per_second": 13.273, "step": 142500 }, { "epoch": 1.34, "learning_rate": 3.65805821962801e-05, "loss": 1.5905, "step": 143000 }, { "epoch": 1.34, "eval_loss": 1.4250717163085938, "eval_runtime": 235.7357, "eval_samples_per_second": 424.204, "eval_steps_per_second": 13.256, "step": 143000 }, { "epoch": 1.35, "learning_rate": 3.653366115500835e-05, "loss": 1.6081, "step": 143500 }, { "epoch": 1.35, "eval_loss": 1.4208674430847168, "eval_runtime": 234.1473, "eval_samples_per_second": 427.082, "eval_steps_per_second": 13.346, "step": 143500 }, { "epoch": 1.35, "learning_rate": 3.64867401137366e-05, "loss": 1.6312, "step": 144000 }, { "epoch": 1.35, "eval_loss": 1.4182296991348267, "eval_runtime": 235.783, "eval_samples_per_second": 424.119, "eval_steps_per_second": 13.254, "step": 144000 }, { "epoch": 1.36, "learning_rate": 3.6439819072464856e-05, "loss": 1.6118, "step": 144500 }, { "epoch": 1.36, "eval_loss": 1.4259034395217896, "eval_runtime": 235.3478, "eval_samples_per_second": 424.903, "eval_steps_per_second": 13.278, "step": 144500 }, { "epoch": 1.36, "learning_rate": 3.639289803119311e-05, "loss": 1.6056, "step": 145000 }, { "epoch": 1.36, "eval_loss": 1.4210768938064575, "eval_runtime": 233.3721, "eval_samples_per_second": 428.5, "eval_steps_per_second": 13.391, "step": 145000 }, { "epoch": 1.37, "learning_rate": 3.634597698992136e-05, "loss": 1.6058, "step": 145500 }, { "epoch": 1.37, "eval_loss": 1.4050568342208862, "eval_runtime": 231.2948, "eval_samples_per_second": 432.349, "eval_steps_per_second": 13.511, "step": 145500 }, { "epoch": 1.37, "learning_rate": 3.6299055948649616e-05, "loss": 1.6005, "step": 146000 }, { "epoch": 1.37, "eval_loss": 1.4116266965866089, "eval_runtime": 231.0158, "eval_samples_per_second": 432.871, "eval_steps_per_second": 13.527, "step": 146000 }, { "epoch": 1.37, "learning_rate": 3.625213490737787e-05, "loss": 1.5909, "step": 146500 }, { "epoch": 1.37, "eval_loss": 1.4132776260375977, "eval_runtime": 231.0824, "eval_samples_per_second": 432.746, "eval_steps_per_second": 13.523, "step": 146500 }, { "epoch": 1.38, "learning_rate": 3.6205213866106116e-05, "loss": 1.614, "step": 147000 }, { "epoch": 1.38, "eval_loss": 1.413307547569275, "eval_runtime": 222.5174, "eval_samples_per_second": 449.403, "eval_steps_per_second": 14.044, "step": 147000 }, { "epoch": 1.38, "learning_rate": 3.615829282483437e-05, "loss": 1.5907, "step": 147500 }, { "epoch": 1.38, "eval_loss": 1.4117494821548462, "eval_runtime": 222.5274, "eval_samples_per_second": 449.383, "eval_steps_per_second": 14.043, "step": 147500 }, { "epoch": 1.39, "learning_rate": 3.611137178356262e-05, "loss": 1.5882, "step": 148000 }, { "epoch": 1.39, "eval_loss": 1.4053359031677246, "eval_runtime": 222.505, "eval_samples_per_second": 449.428, "eval_steps_per_second": 14.045, "step": 148000 }, { "epoch": 1.39, "learning_rate": 3.6064450742290876e-05, "loss": 1.5864, "step": 148500 }, { "epoch": 1.39, "eval_loss": 1.3983163833618164, "eval_runtime": 222.439, "eval_samples_per_second": 449.561, "eval_steps_per_second": 14.049, "step": 148500 }, { "epoch": 1.4, "learning_rate": 3.601752970101913e-05, "loss": 1.5881, "step": 149000 }, { "epoch": 1.4, "eval_loss": 1.4097754955291748, "eval_runtime": 222.6731, "eval_samples_per_second": 449.089, "eval_steps_per_second": 14.034, "step": 149000 }, { "epoch": 1.4, "learning_rate": 3.597060865974738e-05, "loss": 1.5866, "step": 149500 }, { "epoch": 1.4, "eval_loss": 1.4093042612075806, "eval_runtime": 222.7781, "eval_samples_per_second": 448.877, "eval_steps_per_second": 14.027, "step": 149500 }, { "epoch": 1.41, "learning_rate": 3.5923687618475635e-05, "loss": 1.5835, "step": 150000 }, { "epoch": 1.41, "eval_loss": 1.4054986238479614, "eval_runtime": 228.6714, "eval_samples_per_second": 437.309, "eval_steps_per_second": 13.666, "step": 150000 }, { "epoch": 1.41, "learning_rate": 3.587676657720388e-05, "loss": 1.5792, "step": 150500 }, { "epoch": 1.41, "eval_loss": 1.40665864944458, "eval_runtime": 228.4732, "eval_samples_per_second": 437.688, "eval_steps_per_second": 13.678, "step": 150500 }, { "epoch": 1.42, "learning_rate": 3.5829845535932135e-05, "loss": 1.5667, "step": 151000 }, { "epoch": 1.42, "eval_loss": 1.4050201177597046, "eval_runtime": 228.2289, "eval_samples_per_second": 438.157, "eval_steps_per_second": 13.692, "step": 151000 }, { "epoch": 1.42, "learning_rate": 3.578292449466039e-05, "loss": 1.5771, "step": 151500 }, { "epoch": 1.42, "eval_loss": 1.4022421836853027, "eval_runtime": 233.937, "eval_samples_per_second": 427.466, "eval_steps_per_second": 13.358, "step": 151500 }, { "epoch": 1.43, "learning_rate": 3.573600345338864e-05, "loss": 1.5768, "step": 152000 }, { "epoch": 1.43, "eval_loss": 1.4057670831680298, "eval_runtime": 233.6648, "eval_samples_per_second": 427.963, "eval_steps_per_second": 13.374, "step": 152000 }, { "epoch": 1.43, "learning_rate": 3.5689082412116895e-05, "loss": 1.5752, "step": 152500 }, { "epoch": 1.43, "eval_loss": 1.403328537940979, "eval_runtime": 239.3336, "eval_samples_per_second": 417.827, "eval_steps_per_second": 13.057, "step": 152500 }, { "epoch": 1.44, "learning_rate": 3.564216137084515e-05, "loss": 1.5757, "step": 153000 }, { "epoch": 1.44, "eval_loss": 1.3900455236434937, "eval_runtime": 237.9128, "eval_samples_per_second": 420.322, "eval_steps_per_second": 13.135, "step": 153000 }, { "epoch": 1.44, "learning_rate": 3.5595240329573395e-05, "loss": 1.5689, "step": 153500 }, { "epoch": 1.44, "eval_loss": 1.3995317220687866, "eval_runtime": 239.4513, "eval_samples_per_second": 417.621, "eval_steps_per_second": 13.051, "step": 153500 }, { "epoch": 1.45, "learning_rate": 3.554831928830165e-05, "loss": 1.5896, "step": 154000 }, { "epoch": 1.45, "eval_loss": 1.3904423713684082, "eval_runtime": 236.1241, "eval_samples_per_second": 423.506, "eval_steps_per_second": 13.235, "step": 154000 }, { "epoch": 1.45, "learning_rate": 3.55013982470299e-05, "loss": 1.5916, "step": 154500 }, { "epoch": 1.45, "eval_loss": 1.400153636932373, "eval_runtime": 227.755, "eval_samples_per_second": 439.068, "eval_steps_per_second": 13.721, "step": 154500 }, { "epoch": 1.45, "learning_rate": 3.5454477205758155e-05, "loss": 1.5896, "step": 155000 }, { "epoch": 1.45, "eval_loss": 1.3952091932296753, "eval_runtime": 222.2884, "eval_samples_per_second": 449.866, "eval_steps_per_second": 14.058, "step": 155000 }, { "epoch": 1.46, "learning_rate": 3.540755616448641e-05, "loss": 1.5947, "step": 155500 }, { "epoch": 1.46, "eval_loss": 1.3903526067733765, "eval_runtime": 222.5604, "eval_samples_per_second": 449.316, "eval_steps_per_second": 14.041, "step": 155500 }, { "epoch": 1.46, "learning_rate": 3.536063512321466e-05, "loss": 1.5658, "step": 156000 }, { "epoch": 1.46, "eval_loss": 1.388243317604065, "eval_runtime": 222.4336, "eval_samples_per_second": 449.572, "eval_steps_per_second": 14.049, "step": 156000 }, { "epoch": 1.47, "learning_rate": 3.531371408194291e-05, "loss": 1.5711, "step": 156500 }, { "epoch": 1.47, "eval_loss": 1.3848968744277954, "eval_runtime": 222.2993, "eval_samples_per_second": 449.844, "eval_steps_per_second": 14.058, "step": 156500 }, { "epoch": 1.47, "learning_rate": 3.526679304067116e-05, "loss": 1.5877, "step": 157000 }, { "epoch": 1.47, "eval_loss": 1.3827555179595947, "eval_runtime": 222.1315, "eval_samples_per_second": 450.184, "eval_steps_per_second": 14.068, "step": 157000 }, { "epoch": 1.48, "learning_rate": 3.5219871999399414e-05, "loss": 1.5695, "step": 157500 }, { "epoch": 1.48, "eval_loss": 1.390235185623169, "eval_runtime": 222.0358, "eval_samples_per_second": 450.378, "eval_steps_per_second": 14.074, "step": 157500 }, { "epoch": 1.48, "learning_rate": 3.517295095812767e-05, "loss": 1.5652, "step": 158000 }, { "epoch": 1.48, "eval_loss": 1.3825620412826538, "eval_runtime": 221.9295, "eval_samples_per_second": 450.594, "eval_steps_per_second": 14.081, "step": 158000 }, { "epoch": 1.49, "learning_rate": 3.512602991685592e-05, "loss": 1.5696, "step": 158500 }, { "epoch": 1.49, "eval_loss": 1.3775691986083984, "eval_runtime": 221.9726, "eval_samples_per_second": 450.506, "eval_steps_per_second": 14.078, "step": 158500 }, { "epoch": 1.49, "learning_rate": 3.5079108875584174e-05, "loss": 1.5738, "step": 159000 }, { "epoch": 1.49, "eval_loss": 1.3838963508605957, "eval_runtime": 221.9255, "eval_samples_per_second": 450.602, "eval_steps_per_second": 14.081, "step": 159000 }, { "epoch": 1.5, "learning_rate": 3.503218783431242e-05, "loss": 1.5685, "step": 159500 }, { "epoch": 1.5, "eval_loss": 1.384406566619873, "eval_runtime": 221.8809, "eval_samples_per_second": 450.692, "eval_steps_per_second": 14.084, "step": 159500 }, { "epoch": 1.5, "learning_rate": 3.4985266793040674e-05, "loss": 1.5653, "step": 160000 }, { "epoch": 1.5, "eval_loss": 1.3828558921813965, "eval_runtime": 221.9019, "eval_samples_per_second": 450.65, "eval_steps_per_second": 14.083, "step": 160000 }, { "epoch": 1.51, "learning_rate": 3.493834575176893e-05, "loss": 1.5865, "step": 160500 }, { "epoch": 1.51, "eval_loss": 1.385338306427002, "eval_runtime": 221.7161, "eval_samples_per_second": 451.027, "eval_steps_per_second": 14.095, "step": 160500 }, { "epoch": 1.51, "learning_rate": 3.489142471049718e-05, "loss": 1.5501, "step": 161000 }, { "epoch": 1.51, "eval_loss": 1.3802553415298462, "eval_runtime": 221.7084, "eval_samples_per_second": 451.043, "eval_steps_per_second": 14.095, "step": 161000 }, { "epoch": 1.52, "learning_rate": 3.4844503669225434e-05, "loss": 1.5556, "step": 161500 }, { "epoch": 1.52, "eval_loss": 1.3760793209075928, "eval_runtime": 221.6941, "eval_samples_per_second": 451.072, "eval_steps_per_second": 14.096, "step": 161500 }, { "epoch": 1.52, "learning_rate": 3.479758262795368e-05, "loss": 1.5857, "step": 162000 }, { "epoch": 1.52, "eval_loss": 1.38301420211792, "eval_runtime": 221.6934, "eval_samples_per_second": 451.073, "eval_steps_per_second": 14.096, "step": 162000 }, { "epoch": 1.52, "learning_rate": 3.4750661586681934e-05, "loss": 1.5549, "step": 162500 }, { "epoch": 1.52, "eval_loss": 1.3752098083496094, "eval_runtime": 221.8522, "eval_samples_per_second": 450.751, "eval_steps_per_second": 14.086, "step": 162500 }, { "epoch": 1.53, "learning_rate": 3.470374054541019e-05, "loss": 1.5498, "step": 163000 }, { "epoch": 1.53, "eval_loss": 1.378463625907898, "eval_runtime": 221.8169, "eval_samples_per_second": 450.822, "eval_steps_per_second": 14.088, "step": 163000 }, { "epoch": 1.53, "learning_rate": 3.465681950413844e-05, "loss": 1.5719, "step": 163500 }, { "epoch": 1.53, "eval_loss": 1.3748183250427246, "eval_runtime": 221.783, "eval_samples_per_second": 450.891, "eval_steps_per_second": 14.09, "step": 163500 }, { "epoch": 1.54, "learning_rate": 3.4609898462866694e-05, "loss": 1.5751, "step": 164000 }, { "epoch": 1.54, "eval_loss": 1.3731558322906494, "eval_runtime": 221.7643, "eval_samples_per_second": 450.929, "eval_steps_per_second": 14.092, "step": 164000 }, { "epoch": 1.54, "learning_rate": 3.456297742159495e-05, "loss": 1.5459, "step": 164500 }, { "epoch": 1.54, "eval_loss": 1.3722800016403198, "eval_runtime": 221.6317, "eval_samples_per_second": 451.199, "eval_steps_per_second": 14.1, "step": 164500 }, { "epoch": 1.55, "learning_rate": 3.451605638032319e-05, "loss": 1.5603, "step": 165000 }, { "epoch": 1.55, "eval_loss": 1.366198182106018, "eval_runtime": 221.6334, "eval_samples_per_second": 451.196, "eval_steps_per_second": 14.1, "step": 165000 }, { "epoch": 1.55, "learning_rate": 3.446913533905145e-05, "loss": 1.5479, "step": 165500 }, { "epoch": 1.55, "eval_loss": 1.3646401166915894, "eval_runtime": 221.6561, "eval_samples_per_second": 451.149, "eval_steps_per_second": 14.098, "step": 165500 }, { "epoch": 1.56, "learning_rate": 3.44222142977797e-05, "loss": 1.5386, "step": 166000 }, { "epoch": 1.56, "eval_loss": 1.3601208925247192, "eval_runtime": 221.6415, "eval_samples_per_second": 451.179, "eval_steps_per_second": 14.099, "step": 166000 }, { "epoch": 1.56, "learning_rate": 3.437529325650795e-05, "loss": 1.5442, "step": 166500 }, { "epoch": 1.56, "eval_loss": 1.357480764389038, "eval_runtime": 221.6464, "eval_samples_per_second": 451.169, "eval_steps_per_second": 14.099, "step": 166500 }, { "epoch": 1.57, "learning_rate": 3.4328372215236207e-05, "loss": 1.5476, "step": 167000 }, { "epoch": 1.57, "eval_loss": 1.3579912185668945, "eval_runtime": 221.645, "eval_samples_per_second": 451.172, "eval_steps_per_second": 14.099, "step": 167000 }, { "epoch": 1.57, "learning_rate": 3.428145117396445e-05, "loss": 1.538, "step": 167500 }, { "epoch": 1.57, "eval_loss": 1.3654874563217163, "eval_runtime": 221.6529, "eval_samples_per_second": 451.156, "eval_steps_per_second": 14.099, "step": 167500 }, { "epoch": 1.58, "learning_rate": 3.4234530132692706e-05, "loss": 1.5531, "step": 168000 }, { "epoch": 1.58, "eval_loss": 1.3746790885925293, "eval_runtime": 221.6568, "eval_samples_per_second": 451.148, "eval_steps_per_second": 14.098, "step": 168000 }, { "epoch": 1.58, "learning_rate": 3.418760909142096e-05, "loss": 1.5538, "step": 168500 }, { "epoch": 1.58, "eval_loss": 1.3619598150253296, "eval_runtime": 221.6175, "eval_samples_per_second": 451.228, "eval_steps_per_second": 14.101, "step": 168500 }, { "epoch": 1.59, "learning_rate": 3.414068805014921e-05, "loss": 1.5497, "step": 169000 }, { "epoch": 1.59, "eval_loss": 1.3550140857696533, "eval_runtime": 221.6445, "eval_samples_per_second": 451.173, "eval_steps_per_second": 14.099, "step": 169000 }, { "epoch": 1.59, "learning_rate": 3.4093767008877466e-05, "loss": 1.5373, "step": 169500 }, { "epoch": 1.59, "eval_loss": 1.3558791875839233, "eval_runtime": 221.8117, "eval_samples_per_second": 450.833, "eval_steps_per_second": 14.089, "step": 169500 }, { "epoch": 1.6, "learning_rate": 3.404684596760572e-05, "loss": 1.551, "step": 170000 }, { "epoch": 1.6, "eval_loss": 1.3610763549804688, "eval_runtime": 221.8143, "eval_samples_per_second": 450.828, "eval_steps_per_second": 14.088, "step": 170000 }, { "epoch": 1.6, "learning_rate": 3.3999924926333966e-05, "loss": 1.5348, "step": 170500 }, { "epoch": 1.6, "eval_loss": 1.3658798933029175, "eval_runtime": 221.7237, "eval_samples_per_second": 451.012, "eval_steps_per_second": 14.094, "step": 170500 }, { "epoch": 1.6, "learning_rate": 3.395300388506222e-05, "loss": 1.5391, "step": 171000 }, { "epoch": 1.6, "eval_loss": 1.3553804159164429, "eval_runtime": 221.7256, "eval_samples_per_second": 451.008, "eval_steps_per_second": 14.094, "step": 171000 }, { "epoch": 1.61, "learning_rate": 3.390608284379047e-05, "loss": 1.5268, "step": 171500 }, { "epoch": 1.61, "eval_loss": 1.3583581447601318, "eval_runtime": 221.5416, "eval_samples_per_second": 451.383, "eval_steps_per_second": 14.106, "step": 171500 }, { "epoch": 1.61, "learning_rate": 3.3859161802518726e-05, "loss": 1.5377, "step": 172000 }, { "epoch": 1.61, "eval_loss": 1.3604706525802612, "eval_runtime": 221.5467, "eval_samples_per_second": 451.372, "eval_steps_per_second": 14.105, "step": 172000 }, { "epoch": 1.62, "learning_rate": 3.381224076124698e-05, "loss": 1.5201, "step": 172500 }, { "epoch": 1.62, "eval_loss": 1.3594732284545898, "eval_runtime": 221.5767, "eval_samples_per_second": 451.311, "eval_steps_per_second": 14.103, "step": 172500 }, { "epoch": 1.62, "learning_rate": 3.3765319719975226e-05, "loss": 1.5475, "step": 173000 }, { "epoch": 1.62, "eval_loss": 1.3577581644058228, "eval_runtime": 221.5721, "eval_samples_per_second": 451.32, "eval_steps_per_second": 14.104, "step": 173000 }, { "epoch": 1.63, "learning_rate": 3.371839867870348e-05, "loss": 1.5442, "step": 173500 }, { "epoch": 1.63, "eval_loss": 1.3556467294692993, "eval_runtime": 221.5702, "eval_samples_per_second": 451.324, "eval_steps_per_second": 14.104, "step": 173500 }, { "epoch": 1.63, "learning_rate": 3.367147763743173e-05, "loss": 1.5487, "step": 174000 }, { "epoch": 1.63, "eval_loss": 1.35114324092865, "eval_runtime": 221.5518, "eval_samples_per_second": 451.362, "eval_steps_per_second": 14.105, "step": 174000 }, { "epoch": 1.64, "learning_rate": 3.3624556596159985e-05, "loss": 1.5282, "step": 174500 }, { "epoch": 1.64, "eval_loss": 1.3505486249923706, "eval_runtime": 221.5706, "eval_samples_per_second": 451.323, "eval_steps_per_second": 14.104, "step": 174500 }, { "epoch": 1.64, "learning_rate": 3.357763555488824e-05, "loss": 1.5335, "step": 175000 }, { "epoch": 1.64, "eval_loss": 1.3498034477233887, "eval_runtime": 221.5638, "eval_samples_per_second": 451.337, "eval_steps_per_second": 14.104, "step": 175000 }, { "epoch": 1.65, "learning_rate": 3.353071451361649e-05, "loss": 1.5217, "step": 175500 }, { "epoch": 1.65, "eval_loss": 1.3372061252593994, "eval_runtime": 221.6104, "eval_samples_per_second": 451.242, "eval_steps_per_second": 14.101, "step": 175500 }, { "epoch": 1.65, "learning_rate": 3.348379347234474e-05, "loss": 1.5374, "step": 176000 }, { "epoch": 1.65, "eval_loss": 1.3543497323989868, "eval_runtime": 221.5921, "eval_samples_per_second": 451.28, "eval_steps_per_second": 14.102, "step": 176000 }, { "epoch": 1.66, "learning_rate": 3.343687243107299e-05, "loss": 1.5282, "step": 176500 }, { "epoch": 1.66, "eval_loss": 1.3469191789627075, "eval_runtime": 221.5417, "eval_samples_per_second": 451.382, "eval_steps_per_second": 14.106, "step": 176500 }, { "epoch": 1.66, "learning_rate": 3.3389951389801245e-05, "loss": 1.5135, "step": 177000 }, { "epoch": 1.66, "eval_loss": 1.3495582342147827, "eval_runtime": 221.7135, "eval_samples_per_second": 451.033, "eval_steps_per_second": 14.095, "step": 177000 }, { "epoch": 1.67, "learning_rate": 3.33430303485295e-05, "loss": 1.5604, "step": 177500 }, { "epoch": 1.67, "eval_loss": 1.3418365716934204, "eval_runtime": 221.6933, "eval_samples_per_second": 451.074, "eval_steps_per_second": 14.096, "step": 177500 }, { "epoch": 1.67, "learning_rate": 3.329610930725775e-05, "loss": 1.5222, "step": 178000 }, { "epoch": 1.67, "eval_loss": 1.3411223888397217, "eval_runtime": 221.6176, "eval_samples_per_second": 451.228, "eval_steps_per_second": 14.101, "step": 178000 }, { "epoch": 1.68, "learning_rate": 3.3249188265986e-05, "loss": 1.5207, "step": 178500 }, { "epoch": 1.68, "eval_loss": 1.3367658853530884, "eval_runtime": 221.5787, "eval_samples_per_second": 451.307, "eval_steps_per_second": 14.103, "step": 178500 }, { "epoch": 1.68, "learning_rate": 3.320226722471425e-05, "loss": 1.5232, "step": 179000 }, { "epoch": 1.68, "eval_loss": 1.3388681411743164, "eval_runtime": 221.3977, "eval_samples_per_second": 451.676, "eval_steps_per_second": 14.115, "step": 179000 }, { "epoch": 1.68, "learning_rate": 3.3155346183442505e-05, "loss": 1.5397, "step": 179500 }, { "epoch": 1.68, "eval_loss": 1.3412151336669922, "eval_runtime": 221.3909, "eval_samples_per_second": 451.69, "eval_steps_per_second": 14.115, "step": 179500 }, { "epoch": 1.69, "learning_rate": 3.310842514217076e-05, "loss": 1.5368, "step": 180000 }, { "epoch": 1.69, "eval_loss": 1.3428925275802612, "eval_runtime": 221.3914, "eval_samples_per_second": 451.689, "eval_steps_per_second": 14.115, "step": 180000 }, { "epoch": 1.69, "learning_rate": 3.306150410089901e-05, "loss": 1.5595, "step": 180500 }, { "epoch": 1.69, "eval_loss": 1.336493730545044, "eval_runtime": 221.2471, "eval_samples_per_second": 451.983, "eval_steps_per_second": 14.124, "step": 180500 }, { "epoch": 1.7, "learning_rate": 3.3014583059627265e-05, "loss": 1.5407, "step": 181000 }, { "epoch": 1.7, "eval_loss": 1.3366632461547852, "eval_runtime": 221.2531, "eval_samples_per_second": 451.971, "eval_steps_per_second": 14.124, "step": 181000 }, { "epoch": 1.7, "learning_rate": 3.296766201835551e-05, "loss": 1.5289, "step": 181500 }, { "epoch": 1.7, "eval_loss": 1.3385401964187622, "eval_runtime": 221.3851, "eval_samples_per_second": 451.702, "eval_steps_per_second": 14.116, "step": 181500 }, { "epoch": 1.71, "learning_rate": 3.2920740977083764e-05, "loss": 1.5259, "step": 182000 }, { "epoch": 1.71, "eval_loss": 1.3364156484603882, "eval_runtime": 221.3895, "eval_samples_per_second": 451.693, "eval_steps_per_second": 14.115, "step": 182000 }, { "epoch": 1.71, "learning_rate": 3.287381993581202e-05, "loss": 1.5351, "step": 182500 }, { "epoch": 1.71, "eval_loss": 1.3378472328186035, "eval_runtime": 221.3747, "eval_samples_per_second": 451.723, "eval_steps_per_second": 14.116, "step": 182500 }, { "epoch": 1.72, "learning_rate": 3.282689889454027e-05, "loss": 1.5167, "step": 183000 }, { "epoch": 1.72, "eval_loss": 1.3326358795166016, "eval_runtime": 221.3651, "eval_samples_per_second": 451.742, "eval_steps_per_second": 14.117, "step": 183000 }, { "epoch": 1.72, "learning_rate": 3.2779977853268524e-05, "loss": 1.5312, "step": 183500 }, { "epoch": 1.72, "eval_loss": 1.3356856107711792, "eval_runtime": 221.3292, "eval_samples_per_second": 451.816, "eval_steps_per_second": 14.119, "step": 183500 }, { "epoch": 1.73, "learning_rate": 3.273305681199677e-05, "loss": 1.5343, "step": 184000 }, { "epoch": 1.73, "eval_loss": 1.332999348640442, "eval_runtime": 221.3046, "eval_samples_per_second": 451.866, "eval_steps_per_second": 14.121, "step": 184000 }, { "epoch": 1.73, "learning_rate": 3.2686135770725024e-05, "loss": 1.532, "step": 184500 }, { "epoch": 1.73, "eval_loss": 1.3290119171142578, "eval_runtime": 221.4415, "eval_samples_per_second": 451.586, "eval_steps_per_second": 14.112, "step": 184500 }, { "epoch": 1.74, "learning_rate": 3.263921472945328e-05, "loss": 1.5106, "step": 185000 }, { "epoch": 1.74, "eval_loss": 1.3379125595092773, "eval_runtime": 221.5598, "eval_samples_per_second": 451.345, "eval_steps_per_second": 14.105, "step": 185000 }, { "epoch": 1.74, "learning_rate": 3.259229368818153e-05, "loss": 1.5146, "step": 185500 }, { "epoch": 1.74, "eval_loss": 1.3202601671218872, "eval_runtime": 221.5182, "eval_samples_per_second": 451.43, "eval_steps_per_second": 14.107, "step": 185500 }, { "epoch": 1.75, "learning_rate": 3.2545372646909784e-05, "loss": 1.5059, "step": 186000 }, { "epoch": 1.75, "eval_loss": 1.3281402587890625, "eval_runtime": 221.4506, "eval_samples_per_second": 451.568, "eval_steps_per_second": 14.111, "step": 186000 }, { "epoch": 1.75, "learning_rate": 3.249845160563804e-05, "loss": 1.5206, "step": 186500 }, { "epoch": 1.75, "eval_loss": 1.3232953548431396, "eval_runtime": 221.1927, "eval_samples_per_second": 452.095, "eval_steps_per_second": 14.128, "step": 186500 }, { "epoch": 1.75, "learning_rate": 3.2451530564366284e-05, "loss": 1.5332, "step": 187000 }, { "epoch": 1.75, "eval_loss": 1.328766107559204, "eval_runtime": 221.1463, "eval_samples_per_second": 452.189, "eval_steps_per_second": 14.131, "step": 187000 }, { "epoch": 1.76, "learning_rate": 3.240460952309454e-05, "loss": 1.5253, "step": 187500 }, { "epoch": 1.76, "eval_loss": 1.3251526355743408, "eval_runtime": 221.188, "eval_samples_per_second": 452.104, "eval_steps_per_second": 14.128, "step": 187500 }, { "epoch": 1.76, "learning_rate": 3.235768848182279e-05, "loss": 1.5412, "step": 188000 }, { "epoch": 1.76, "eval_loss": 1.3271182775497437, "eval_runtime": 221.1601, "eval_samples_per_second": 452.161, "eval_steps_per_second": 14.13, "step": 188000 }, { "epoch": 1.77, "learning_rate": 3.2310767440551044e-05, "loss": 1.5102, "step": 188500 }, { "epoch": 1.77, "eval_loss": 1.32902193069458, "eval_runtime": 221.169, "eval_samples_per_second": 452.143, "eval_steps_per_second": 14.129, "step": 188500 }, { "epoch": 1.77, "learning_rate": 3.22638463992793e-05, "loss": 1.5158, "step": 189000 }, { "epoch": 1.77, "eval_loss": 1.3175742626190186, "eval_runtime": 221.1728, "eval_samples_per_second": 452.135, "eval_steps_per_second": 14.129, "step": 189000 }, { "epoch": 1.78, "learning_rate": 3.221692535800755e-05, "loss": 1.5015, "step": 189500 }, { "epoch": 1.78, "eval_loss": 1.3242403268814087, "eval_runtime": 221.1038, "eval_samples_per_second": 452.276, "eval_steps_per_second": 14.134, "step": 189500 }, { "epoch": 1.78, "learning_rate": 3.21700043167358e-05, "loss": 1.5226, "step": 190000 }, { "epoch": 1.78, "eval_loss": 1.3241665363311768, "eval_runtime": 221.0819, "eval_samples_per_second": 452.321, "eval_steps_per_second": 14.135, "step": 190000 }, { "epoch": 1.79, "learning_rate": 3.212308327546405e-05, "loss": 1.5164, "step": 190500 }, { "epoch": 1.79, "eval_loss": 1.3255438804626465, "eval_runtime": 221.0427, "eval_samples_per_second": 452.401, "eval_steps_per_second": 14.138, "step": 190500 }, { "epoch": 1.79, "learning_rate": 3.20761622341923e-05, "loss": 1.4973, "step": 191000 }, { "epoch": 1.79, "eval_loss": 1.317428469657898, "eval_runtime": 221.0182, "eval_samples_per_second": 452.451, "eval_steps_per_second": 14.139, "step": 191000 }, { "epoch": 1.8, "learning_rate": 3.2029241192920556e-05, "loss": 1.5136, "step": 191500 }, { "epoch": 1.8, "eval_loss": 1.3230990171432495, "eval_runtime": 221.0488, "eval_samples_per_second": 452.389, "eval_steps_per_second": 14.137, "step": 191500 }, { "epoch": 1.8, "learning_rate": 3.198232015164881e-05, "loss": 1.4964, "step": 192000 }, { "epoch": 1.8, "eval_loss": 1.3161015510559082, "eval_runtime": 221.0504, "eval_samples_per_second": 452.385, "eval_steps_per_second": 14.137, "step": 192000 }, { "epoch": 1.81, "learning_rate": 3.1935399110377056e-05, "loss": 1.5111, "step": 192500 }, { "epoch": 1.81, "eval_loss": 1.3233813047409058, "eval_runtime": 221.2963, "eval_samples_per_second": 451.883, "eval_steps_per_second": 14.121, "step": 192500 }, { "epoch": 1.81, "learning_rate": 3.188847806910531e-05, "loss": 1.4848, "step": 193000 }, { "epoch": 1.81, "eval_loss": 1.3204160928726196, "eval_runtime": 221.2711, "eval_samples_per_second": 451.934, "eval_steps_per_second": 14.123, "step": 193000 }, { "epoch": 1.82, "learning_rate": 3.184155702783356e-05, "loss": 1.5071, "step": 193500 }, { "epoch": 1.82, "eval_loss": 1.3207734823226929, "eval_runtime": 221.1959, "eval_samples_per_second": 452.088, "eval_steps_per_second": 14.128, "step": 193500 }, { "epoch": 1.82, "learning_rate": 3.1794635986561816e-05, "loss": 1.505, "step": 194000 }, { "epoch": 1.82, "eval_loss": 1.3126252889633179, "eval_runtime": 221.1627, "eval_samples_per_second": 452.156, "eval_steps_per_second": 14.13, "step": 194000 }, { "epoch": 1.83, "learning_rate": 3.174771494529007e-05, "loss": 1.5166, "step": 194500 }, { "epoch": 1.83, "eval_loss": 1.3139557838439941, "eval_runtime": 221.0163, "eval_samples_per_second": 452.455, "eval_steps_per_second": 14.139, "step": 194500 }, { "epoch": 1.83, "learning_rate": 3.170079390401832e-05, "loss": 1.5, "step": 195000 }, { "epoch": 1.83, "eval_loss": 1.3125284910202026, "eval_runtime": 221.0368, "eval_samples_per_second": 452.413, "eval_steps_per_second": 14.138, "step": 195000 }, { "epoch": 1.83, "learning_rate": 3.165387286274657e-05, "loss": 1.5055, "step": 195500 }, { "epoch": 1.83, "eval_loss": 1.3205658197402954, "eval_runtime": 221.047, "eval_samples_per_second": 452.392, "eval_steps_per_second": 14.137, "step": 195500 }, { "epoch": 1.84, "learning_rate": 3.160695182147482e-05, "loss": 1.4874, "step": 196000 }, { "epoch": 1.84, "eval_loss": 1.3127025365829468, "eval_runtime": 221.0478, "eval_samples_per_second": 452.391, "eval_steps_per_second": 14.137, "step": 196000 }, { "epoch": 1.84, "learning_rate": 3.1560030780203076e-05, "loss": 1.4879, "step": 196500 }, { "epoch": 1.84, "eval_loss": 1.3064494132995605, "eval_runtime": 221.0498, "eval_samples_per_second": 452.387, "eval_steps_per_second": 14.137, "step": 196500 }, { "epoch": 1.85, "learning_rate": 3.151310973893133e-05, "loss": 1.4988, "step": 197000 }, { "epoch": 1.85, "eval_loss": 1.3118481636047363, "eval_runtime": 221.0593, "eval_samples_per_second": 452.367, "eval_steps_per_second": 14.136, "step": 197000 }, { "epoch": 1.85, "learning_rate": 3.146618869765958e-05, "loss": 1.5136, "step": 197500 }, { "epoch": 1.85, "eval_loss": 1.3082212209701538, "eval_runtime": 221.0316, "eval_samples_per_second": 452.424, "eval_steps_per_second": 14.138, "step": 197500 }, { "epoch": 1.86, "learning_rate": 3.141926765638783e-05, "loss": 1.4772, "step": 198000 }, { "epoch": 1.86, "eval_loss": 1.3054986000061035, "eval_runtime": 221.0487, "eval_samples_per_second": 452.389, "eval_steps_per_second": 14.137, "step": 198000 }, { "epoch": 1.86, "learning_rate": 3.137234661511608e-05, "loss": 1.4835, "step": 198500 }, { "epoch": 1.86, "eval_loss": 1.3038787841796875, "eval_runtime": 221.0318, "eval_samples_per_second": 452.424, "eval_steps_per_second": 14.138, "step": 198500 }, { "epoch": 1.87, "learning_rate": 3.1325425573844335e-05, "loss": 1.4953, "step": 199000 }, { "epoch": 1.87, "eval_loss": 1.3007208108901978, "eval_runtime": 221.0384, "eval_samples_per_second": 452.41, "eval_steps_per_second": 14.138, "step": 199000 }, { "epoch": 1.87, "learning_rate": 3.127850453257259e-05, "loss": 1.488, "step": 199500 }, { "epoch": 1.87, "eval_loss": 1.301313042640686, "eval_runtime": 221.0649, "eval_samples_per_second": 452.356, "eval_steps_per_second": 14.136, "step": 199500 }, { "epoch": 1.88, "learning_rate": 3.123158349130084e-05, "loss": 1.4808, "step": 200000 }, { "epoch": 1.88, "eval_loss": 1.307305097579956, "eval_runtime": 221.0452, "eval_samples_per_second": 452.396, "eval_steps_per_second": 14.137, "step": 200000 }, { "epoch": 1.88, "learning_rate": 3.1184662450029095e-05, "loss": 1.49, "step": 200500 }, { "epoch": 1.88, "eval_loss": 1.3078263998031616, "eval_runtime": 220.9228, "eval_samples_per_second": 452.647, "eval_steps_per_second": 14.145, "step": 200500 }, { "epoch": 1.89, "learning_rate": 3.113774140875734e-05, "loss": 1.4568, "step": 201000 }, { "epoch": 1.89, "eval_loss": 1.3042628765106201, "eval_runtime": 221.0985, "eval_samples_per_second": 452.287, "eval_steps_per_second": 14.134, "step": 201000 }, { "epoch": 1.89, "learning_rate": 3.1090820367485595e-05, "loss": 1.4818, "step": 201500 }, { "epoch": 1.89, "eval_loss": 1.3147982358932495, "eval_runtime": 221.0384, "eval_samples_per_second": 452.41, "eval_steps_per_second": 14.138, "step": 201500 }, { "epoch": 1.9, "learning_rate": 3.104389932621385e-05, "loss": 1.4693, "step": 202000 }, { "epoch": 1.9, "eval_loss": 1.2982258796691895, "eval_runtime": 220.9646, "eval_samples_per_second": 452.561, "eval_steps_per_second": 14.143, "step": 202000 }, { "epoch": 1.9, "learning_rate": 3.09969782849421e-05, "loss": 1.4645, "step": 202500 }, { "epoch": 1.9, "eval_loss": 1.3004374504089355, "eval_runtime": 220.9286, "eval_samples_per_second": 452.635, "eval_steps_per_second": 14.145, "step": 202500 }, { "epoch": 1.9, "learning_rate": 3.0950057243670355e-05, "loss": 1.4802, "step": 203000 }, { "epoch": 1.9, "eval_loss": 1.2999825477600098, "eval_runtime": 220.8399, "eval_samples_per_second": 452.817, "eval_steps_per_second": 14.151, "step": 203000 }, { "epoch": 1.91, "learning_rate": 3.09031362023986e-05, "loss": 1.4797, "step": 203500 }, { "epoch": 1.91, "eval_loss": 1.2966198921203613, "eval_runtime": 220.8339, "eval_samples_per_second": 452.829, "eval_steps_per_second": 14.151, "step": 203500 }, { "epoch": 1.91, "learning_rate": 3.0856215161126855e-05, "loss": 1.4995, "step": 204000 }, { "epoch": 1.91, "eval_loss": 1.2981046438217163, "eval_runtime": 220.814, "eval_samples_per_second": 452.87, "eval_steps_per_second": 14.152, "step": 204000 }, { "epoch": 1.92, "learning_rate": 3.080929411985511e-05, "loss": 1.5087, "step": 204500 }, { "epoch": 1.92, "eval_loss": 1.2951112985610962, "eval_runtime": 220.8383, "eval_samples_per_second": 452.82, "eval_steps_per_second": 14.151, "step": 204500 }, { "epoch": 1.92, "learning_rate": 3.076237307858336e-05, "loss": 1.4984, "step": 205000 }, { "epoch": 1.92, "eval_loss": 1.2964484691619873, "eval_runtime": 220.8897, "eval_samples_per_second": 452.715, "eval_steps_per_second": 14.147, "step": 205000 }, { "epoch": 1.93, "learning_rate": 3.0715452037311615e-05, "loss": 1.4864, "step": 205500 }, { "epoch": 1.93, "eval_loss": 1.2925612926483154, "eval_runtime": 220.8427, "eval_samples_per_second": 452.811, "eval_steps_per_second": 14.15, "step": 205500 }, { "epoch": 1.93, "learning_rate": 3.066853099603987e-05, "loss": 1.4752, "step": 206000 }, { "epoch": 1.93, "eval_loss": 1.3018134832382202, "eval_runtime": 220.8565, "eval_samples_per_second": 452.783, "eval_steps_per_second": 14.149, "step": 206000 }, { "epoch": 1.94, "learning_rate": 3.0621609954768114e-05, "loss": 1.5018, "step": 206500 }, { "epoch": 1.94, "eval_loss": 1.2949315309524536, "eval_runtime": 220.8631, "eval_samples_per_second": 452.769, "eval_steps_per_second": 14.149, "step": 206500 }, { "epoch": 1.94, "learning_rate": 3.057468891349637e-05, "loss": 1.4902, "step": 207000 }, { "epoch": 1.94, "eval_loss": 1.2878872156143188, "eval_runtime": 220.8333, "eval_samples_per_second": 452.83, "eval_steps_per_second": 14.151, "step": 207000 }, { "epoch": 1.95, "learning_rate": 3.052776787222462e-05, "loss": 1.4699, "step": 207500 }, { "epoch": 1.95, "eval_loss": 1.2887647151947021, "eval_runtime": 220.8064, "eval_samples_per_second": 452.885, "eval_steps_per_second": 14.153, "step": 207500 }, { "epoch": 1.95, "learning_rate": 3.0480846830952874e-05, "loss": 1.4834, "step": 208000 }, { "epoch": 1.95, "eval_loss": 1.290585994720459, "eval_runtime": 220.7934, "eval_samples_per_second": 452.912, "eval_steps_per_second": 14.154, "step": 208000 }, { "epoch": 1.96, "learning_rate": 3.0433925789681128e-05, "loss": 1.4561, "step": 208500 }, { "epoch": 1.96, "eval_loss": 1.297606110572815, "eval_runtime": 220.8078, "eval_samples_per_second": 452.882, "eval_steps_per_second": 14.153, "step": 208500 }, { "epoch": 1.96, "learning_rate": 3.0387004748409374e-05, "loss": 1.4673, "step": 209000 }, { "epoch": 1.96, "eval_loss": 1.2891823053359985, "eval_runtime": 220.802, "eval_samples_per_second": 452.895, "eval_steps_per_second": 14.153, "step": 209000 }, { "epoch": 1.97, "learning_rate": 3.0340083707137627e-05, "loss": 1.4872, "step": 209500 }, { "epoch": 1.97, "eval_loss": 1.2896424531936646, "eval_runtime": 220.9571, "eval_samples_per_second": 452.577, "eval_steps_per_second": 14.143, "step": 209500 }, { "epoch": 1.97, "learning_rate": 3.029316266586588e-05, "loss": 1.4538, "step": 210000 }, { "epoch": 1.97, "eval_loss": 1.2892200946807861, "eval_runtime": 220.9468, "eval_samples_per_second": 452.598, "eval_steps_per_second": 14.144, "step": 210000 }, { "epoch": 1.98, "learning_rate": 3.0246241624594134e-05, "loss": 1.4672, "step": 210500 }, { "epoch": 1.98, "eval_loss": 1.2804533243179321, "eval_runtime": 220.8886, "eval_samples_per_second": 452.717, "eval_steps_per_second": 14.147, "step": 210500 }, { "epoch": 1.98, "learning_rate": 3.0199320583322387e-05, "loss": 1.46, "step": 211000 }, { "epoch": 1.98, "eval_loss": 1.292427659034729, "eval_runtime": 220.8568, "eval_samples_per_second": 452.782, "eval_steps_per_second": 14.149, "step": 211000 }, { "epoch": 1.98, "learning_rate": 3.015239954205064e-05, "loss": 1.4597, "step": 211500 }, { "epoch": 1.98, "eval_loss": 1.2845008373260498, "eval_runtime": 220.714, "eval_samples_per_second": 453.075, "eval_steps_per_second": 14.159, "step": 211500 }, { "epoch": 1.99, "learning_rate": 3.0105478500778887e-05, "loss": 1.4603, "step": 212000 }, { "epoch": 1.99, "eval_loss": 1.2904783487319946, "eval_runtime": 220.7365, "eval_samples_per_second": 453.029, "eval_steps_per_second": 14.157, "step": 212000 }, { "epoch": 1.99, "learning_rate": 3.005855745950714e-05, "loss": 1.4812, "step": 212500 }, { "epoch": 1.99, "eval_loss": 1.2882287502288818, "eval_runtime": 220.7381, "eval_samples_per_second": 453.025, "eval_steps_per_second": 14.157, "step": 212500 }, { "epoch": 2.0, "learning_rate": 3.0011636418235394e-05, "loss": 1.4662, "step": 213000 }, { "epoch": 2.0, "eval_loss": 1.277458667755127, "eval_runtime": 220.7153, "eval_samples_per_second": 453.072, "eval_steps_per_second": 14.159, "step": 213000 }, { "epoch": 2.0, "learning_rate": 2.9964715376963647e-05, "loss": 1.4622, "step": 213500 }, { "epoch": 2.0, "eval_loss": 1.287771463394165, "eval_runtime": 220.7913, "eval_samples_per_second": 452.916, "eval_steps_per_second": 14.154, "step": 213500 }, { "epoch": 2.01, "learning_rate": 2.99177943356919e-05, "loss": 1.4618, "step": 214000 }, { "epoch": 2.01, "eval_loss": 1.2776265144348145, "eval_runtime": 220.7076, "eval_samples_per_second": 453.088, "eval_steps_per_second": 14.159, "step": 214000 }, { "epoch": 2.01, "learning_rate": 2.987087329442015e-05, "loss": 1.4462, "step": 214500 }, { "epoch": 2.01, "eval_loss": 1.282547116279602, "eval_runtime": 220.7335, "eval_samples_per_second": 453.035, "eval_steps_per_second": 14.157, "step": 214500 }, { "epoch": 2.02, "learning_rate": 2.9823952253148403e-05, "loss": 1.4502, "step": 215000 }, { "epoch": 2.02, "eval_loss": 1.2854583263397217, "eval_runtime": 220.783, "eval_samples_per_second": 452.933, "eval_steps_per_second": 14.154, "step": 215000 }, { "epoch": 2.02, "learning_rate": 2.9777031211876653e-05, "loss": 1.4419, "step": 215500 }, { "epoch": 2.02, "eval_loss": 1.282270908355713, "eval_runtime": 220.7936, "eval_samples_per_second": 452.912, "eval_steps_per_second": 14.153, "step": 215500 }, { "epoch": 2.03, "learning_rate": 2.9730110170604906e-05, "loss": 1.4476, "step": 216000 }, { "epoch": 2.03, "eval_loss": 1.273934006690979, "eval_runtime": 220.778, "eval_samples_per_second": 452.944, "eval_steps_per_second": 14.154, "step": 216000 }, { "epoch": 2.03, "learning_rate": 2.968318912933316e-05, "loss": 1.4549, "step": 216500 }, { "epoch": 2.03, "eval_loss": 1.2787511348724365, "eval_runtime": 220.7612, "eval_samples_per_second": 452.978, "eval_steps_per_second": 14.156, "step": 216500 }, { "epoch": 2.04, "learning_rate": 2.9636268088061413e-05, "loss": 1.4614, "step": 217000 }, { "epoch": 2.04, "eval_loss": 1.2720485925674438, "eval_runtime": 220.7698, "eval_samples_per_second": 452.96, "eval_steps_per_second": 14.155, "step": 217000 }, { "epoch": 2.04, "learning_rate": 2.9589347046789663e-05, "loss": 1.4594, "step": 217500 }, { "epoch": 2.04, "eval_loss": 1.2739953994750977, "eval_runtime": 220.7583, "eval_samples_per_second": 452.984, "eval_steps_per_second": 14.156, "step": 217500 }, { "epoch": 2.05, "learning_rate": 2.9542426005517916e-05, "loss": 1.4633, "step": 218000 }, { "epoch": 2.05, "eval_loss": 1.279288411140442, "eval_runtime": 220.7177, "eval_samples_per_second": 453.068, "eval_steps_per_second": 14.158, "step": 218000 }, { "epoch": 2.05, "learning_rate": 2.949550496424617e-05, "loss": 1.4755, "step": 218500 }, { "epoch": 2.05, "eval_loss": 1.2718831300735474, "eval_runtime": 220.8318, "eval_samples_per_second": 452.833, "eval_steps_per_second": 14.151, "step": 218500 }, { "epoch": 2.06, "learning_rate": 2.944858392297442e-05, "loss": 1.4608, "step": 219000 }, { "epoch": 2.06, "eval_loss": 1.2677054405212402, "eval_runtime": 220.8761, "eval_samples_per_second": 452.742, "eval_steps_per_second": 14.148, "step": 219000 }, { "epoch": 2.06, "learning_rate": 2.9401662881702673e-05, "loss": 1.4327, "step": 219500 }, { "epoch": 2.06, "eval_loss": 1.270250678062439, "eval_runtime": 220.795, "eval_samples_per_second": 452.909, "eval_steps_per_second": 14.153, "step": 219500 }, { "epoch": 2.06, "learning_rate": 2.9354741840430923e-05, "loss": 1.4414, "step": 220000 }, { "epoch": 2.06, "eval_loss": 1.2740583419799805, "eval_runtime": 220.7867, "eval_samples_per_second": 452.926, "eval_steps_per_second": 14.154, "step": 220000 }, { "epoch": 2.07, "learning_rate": 2.9307820799159176e-05, "loss": 1.4333, "step": 220500 }, { "epoch": 2.07, "eval_loss": 1.2706983089447021, "eval_runtime": 220.6245, "eval_samples_per_second": 453.259, "eval_steps_per_second": 14.164, "step": 220500 }, { "epoch": 2.07, "learning_rate": 2.926089975788743e-05, "loss": 1.445, "step": 221000 }, { "epoch": 2.07, "eval_loss": 1.2743273973464966, "eval_runtime": 220.6137, "eval_samples_per_second": 453.281, "eval_steps_per_second": 14.165, "step": 221000 }, { "epoch": 2.08, "learning_rate": 2.9213978716615682e-05, "loss": 1.4417, "step": 221500 }, { "epoch": 2.08, "eval_loss": 1.2643128633499146, "eval_runtime": 220.619, "eval_samples_per_second": 453.27, "eval_steps_per_second": 14.165, "step": 221500 }, { "epoch": 2.08, "learning_rate": 2.9167057675343932e-05, "loss": 1.4456, "step": 222000 }, { "epoch": 2.08, "eval_loss": 1.2675349712371826, "eval_runtime": 220.5897, "eval_samples_per_second": 453.33, "eval_steps_per_second": 14.167, "step": 222000 }, { "epoch": 2.09, "learning_rate": 2.9120136634072186e-05, "loss": 1.4515, "step": 222500 }, { "epoch": 2.09, "eval_loss": 1.2693637609481812, "eval_runtime": 220.5837, "eval_samples_per_second": 453.343, "eval_steps_per_second": 14.167, "step": 222500 }, { "epoch": 2.09, "learning_rate": 2.9073215592800436e-05, "loss": 1.4649, "step": 223000 }, { "epoch": 2.09, "eval_loss": 1.269477367401123, "eval_runtime": 220.5619, "eval_samples_per_second": 453.387, "eval_steps_per_second": 14.168, "step": 223000 }, { "epoch": 2.1, "learning_rate": 2.902629455152869e-05, "loss": 1.458, "step": 223500 }, { "epoch": 2.1, "eval_loss": 1.270194411277771, "eval_runtime": 220.6045, "eval_samples_per_second": 453.3, "eval_steps_per_second": 14.166, "step": 223500 }, { "epoch": 2.1, "learning_rate": 2.8979373510256942e-05, "loss": 1.4286, "step": 224000 }, { "epoch": 2.1, "eval_loss": 1.2670238018035889, "eval_runtime": 220.6056, "eval_samples_per_second": 453.298, "eval_steps_per_second": 14.166, "step": 224000 }, { "epoch": 2.11, "learning_rate": 2.8932452468985195e-05, "loss": 1.444, "step": 224500 }, { "epoch": 2.11, "eval_loss": 1.263913869857788, "eval_runtime": 220.6041, "eval_samples_per_second": 453.301, "eval_steps_per_second": 14.166, "step": 224500 }, { "epoch": 2.11, "learning_rate": 2.888553142771345e-05, "loss": 1.4352, "step": 225000 }, { "epoch": 2.11, "eval_loss": 1.2642849683761597, "eval_runtime": 220.5676, "eval_samples_per_second": 453.376, "eval_steps_per_second": 14.168, "step": 225000 }, { "epoch": 2.12, "learning_rate": 2.8838610386441695e-05, "loss": 1.4535, "step": 225500 }, { "epoch": 2.12, "eval_loss": 1.2708691358566284, "eval_runtime": 220.6074, "eval_samples_per_second": 453.294, "eval_steps_per_second": 14.165, "step": 225500 }, { "epoch": 2.12, "learning_rate": 2.879168934516995e-05, "loss": 1.4289, "step": 226000 }, { "epoch": 2.12, "eval_loss": 1.2640670537948608, "eval_runtime": 220.5867, "eval_samples_per_second": 453.336, "eval_steps_per_second": 14.167, "step": 226000 }, { "epoch": 2.13, "learning_rate": 2.8744768303898202e-05, "loss": 1.4569, "step": 226500 }, { "epoch": 2.13, "eval_loss": 1.2591434717178345, "eval_runtime": 220.5876, "eval_samples_per_second": 453.335, "eval_steps_per_second": 14.167, "step": 226500 }, { "epoch": 2.13, "learning_rate": 2.8697847262626455e-05, "loss": 1.4474, "step": 227000 }, { "epoch": 2.13, "eval_loss": 1.2669652700424194, "eval_runtime": 220.5728, "eval_samples_per_second": 453.365, "eval_steps_per_second": 14.168, "step": 227000 }, { "epoch": 2.13, "learning_rate": 2.865092622135471e-05, "loss": 1.4412, "step": 227500 }, { "epoch": 2.13, "eval_loss": 1.2617195844650269, "eval_runtime": 220.5569, "eval_samples_per_second": 453.398, "eval_steps_per_second": 14.169, "step": 227500 }, { "epoch": 2.14, "learning_rate": 2.860400518008296e-05, "loss": 1.4522, "step": 228000 }, { "epoch": 2.14, "eval_loss": 1.262453317642212, "eval_runtime": 220.6524, "eval_samples_per_second": 453.201, "eval_steps_per_second": 14.163, "step": 228000 }, { "epoch": 2.14, "learning_rate": 2.8557084138811208e-05, "loss": 1.4223, "step": 228500 }, { "epoch": 2.14, "eval_loss": 1.2601869106292725, "eval_runtime": 220.7247, "eval_samples_per_second": 453.053, "eval_steps_per_second": 14.158, "step": 228500 }, { "epoch": 2.15, "learning_rate": 2.851016309753946e-05, "loss": 1.4352, "step": 229000 }, { "epoch": 2.15, "eval_loss": 1.2503169775009155, "eval_runtime": 220.6544, "eval_samples_per_second": 453.197, "eval_steps_per_second": 14.162, "step": 229000 }, { "epoch": 2.15, "learning_rate": 2.8463242056267715e-05, "loss": 1.4153, "step": 229500 }, { "epoch": 2.15, "eval_loss": 1.2586244344711304, "eval_runtime": 220.6119, "eval_samples_per_second": 453.285, "eval_steps_per_second": 14.165, "step": 229500 }, { "epoch": 2.16, "learning_rate": 2.8416321014995968e-05, "loss": 1.4255, "step": 230000 }, { "epoch": 2.16, "eval_loss": 1.2628742456436157, "eval_runtime": 220.5148, "eval_samples_per_second": 453.484, "eval_steps_per_second": 14.171, "step": 230000 }, { "epoch": 2.16, "learning_rate": 2.836939997372422e-05, "loss": 1.4402, "step": 230500 }, { "epoch": 2.16, "eval_loss": 1.2623021602630615, "eval_runtime": 220.3405, "eval_samples_per_second": 453.843, "eval_steps_per_second": 14.183, "step": 230500 }, { "epoch": 2.17, "learning_rate": 2.8322478932452475e-05, "loss": 1.4327, "step": 231000 }, { "epoch": 2.17, "eval_loss": 1.2573188543319702, "eval_runtime": 220.4056, "eval_samples_per_second": 453.709, "eval_steps_per_second": 14.178, "step": 231000 }, { "epoch": 2.17, "learning_rate": 2.827555789118072e-05, "loss": 1.4522, "step": 231500 }, { "epoch": 2.17, "eval_loss": 1.262677550315857, "eval_runtime": 220.4232, "eval_samples_per_second": 453.673, "eval_steps_per_second": 14.177, "step": 231500 }, { "epoch": 2.18, "learning_rate": 2.8228636849908974e-05, "loss": 1.4528, "step": 232000 }, { "epoch": 2.18, "eval_loss": 1.259798526763916, "eval_runtime": 220.4521, "eval_samples_per_second": 453.613, "eval_steps_per_second": 14.175, "step": 232000 }, { "epoch": 2.18, "learning_rate": 2.8181715808637228e-05, "loss": 1.4322, "step": 232500 }, { "epoch": 2.18, "eval_loss": 1.2561190128326416, "eval_runtime": 220.4464, "eval_samples_per_second": 453.625, "eval_steps_per_second": 14.176, "step": 232500 }, { "epoch": 2.19, "learning_rate": 2.813479476736548e-05, "loss": 1.4285, "step": 233000 }, { "epoch": 2.19, "eval_loss": 1.262929916381836, "eval_runtime": 220.4749, "eval_samples_per_second": 453.566, "eval_steps_per_second": 14.174, "step": 233000 }, { "epoch": 2.19, "learning_rate": 2.8087873726093734e-05, "loss": 1.4357, "step": 233500 }, { "epoch": 2.19, "eval_loss": 1.2526911497116089, "eval_runtime": 220.5247, "eval_samples_per_second": 453.464, "eval_steps_per_second": 14.171, "step": 233500 }, { "epoch": 2.2, "learning_rate": 2.804095268482198e-05, "loss": 1.4242, "step": 234000 }, { "epoch": 2.2, "eval_loss": 1.2577769756317139, "eval_runtime": 220.5683, "eval_samples_per_second": 453.374, "eval_steps_per_second": 14.168, "step": 234000 }, { "epoch": 2.2, "learning_rate": 2.7994031643550234e-05, "loss": 1.4162, "step": 234500 }, { "epoch": 2.2, "eval_loss": 1.2519463300704956, "eval_runtime": 220.6043, "eval_samples_per_second": 453.3, "eval_steps_per_second": 14.166, "step": 234500 }, { "epoch": 2.21, "learning_rate": 2.7947110602278487e-05, "loss": 1.4332, "step": 235000 }, { "epoch": 2.21, "eval_loss": 1.2554024457931519, "eval_runtime": 220.6422, "eval_samples_per_second": 453.222, "eval_steps_per_second": 14.163, "step": 235000 }, { "epoch": 2.21, "learning_rate": 2.790018956100674e-05, "loss": 1.4313, "step": 235500 }, { "epoch": 2.21, "eval_loss": 1.2516902685165405, "eval_runtime": 220.661, "eval_samples_per_second": 453.184, "eval_steps_per_second": 14.162, "step": 235500 }, { "epoch": 2.21, "learning_rate": 2.7853268519734994e-05, "loss": 1.4295, "step": 236000 }, { "epoch": 2.21, "eval_loss": 1.246413230895996, "eval_runtime": 220.6917, "eval_samples_per_second": 453.121, "eval_steps_per_second": 14.16, "step": 236000 }, { "epoch": 2.22, "learning_rate": 2.7806347478463247e-05, "loss": 1.4206, "step": 236500 }, { "epoch": 2.22, "eval_loss": 1.2490031719207764, "eval_runtime": 220.6934, "eval_samples_per_second": 453.117, "eval_steps_per_second": 14.16, "step": 236500 }, { "epoch": 2.22, "learning_rate": 2.7759426437191494e-05, "loss": 1.4304, "step": 237000 }, { "epoch": 2.22, "eval_loss": 1.2488088607788086, "eval_runtime": 220.7139, "eval_samples_per_second": 453.075, "eval_steps_per_second": 14.159, "step": 237000 }, { "epoch": 2.23, "learning_rate": 2.7712505395919747e-05, "loss": 1.4205, "step": 237500 }, { "epoch": 2.23, "eval_loss": 1.2458001375198364, "eval_runtime": 220.8036, "eval_samples_per_second": 452.891, "eval_steps_per_second": 14.153, "step": 237500 }, { "epoch": 2.23, "learning_rate": 2.7665584354648e-05, "loss": 1.4287, "step": 238000 }, { "epoch": 2.23, "eval_loss": 1.2452707290649414, "eval_runtime": 220.9497, "eval_samples_per_second": 452.592, "eval_steps_per_second": 14.143, "step": 238000 }, { "epoch": 2.24, "learning_rate": 2.7618663313376253e-05, "loss": 1.4384, "step": 238500 }, { "epoch": 2.24, "eval_loss": 1.2455310821533203, "eval_runtime": 220.9115, "eval_samples_per_second": 452.67, "eval_steps_per_second": 14.146, "step": 238500 }, { "epoch": 2.24, "learning_rate": 2.7571742272104507e-05, "loss": 1.4297, "step": 239000 }, { "epoch": 2.24, "eval_loss": 1.2461364269256592, "eval_runtime": 220.8712, "eval_samples_per_second": 452.753, "eval_steps_per_second": 14.149, "step": 239000 }, { "epoch": 2.25, "learning_rate": 2.7524821230832753e-05, "loss": 1.4192, "step": 239500 }, { "epoch": 2.25, "eval_loss": 1.241681456565857, "eval_runtime": 220.7997, "eval_samples_per_second": 452.899, "eval_steps_per_second": 14.153, "step": 239500 }, { "epoch": 2.25, "learning_rate": 2.7477900189561007e-05, "loss": 1.4371, "step": 240000 }, { "epoch": 2.25, "eval_loss": 1.241499662399292, "eval_runtime": 220.8006, "eval_samples_per_second": 452.897, "eval_steps_per_second": 14.153, "step": 240000 }, { "epoch": 2.26, "learning_rate": 2.743097914828926e-05, "loss": 1.4134, "step": 240500 }, { "epoch": 2.26, "eval_loss": 1.2505980730056763, "eval_runtime": 220.9405, "eval_samples_per_second": 452.611, "eval_steps_per_second": 14.144, "step": 240500 }, { "epoch": 2.26, "learning_rate": 2.7384058107017513e-05, "loss": 1.4282, "step": 241000 }, { "epoch": 2.26, "eval_loss": 1.2383712530136108, "eval_runtime": 220.9785, "eval_samples_per_second": 452.533, "eval_steps_per_second": 14.142, "step": 241000 }, { "epoch": 2.27, "learning_rate": 2.7337137065745766e-05, "loss": 1.423, "step": 241500 }, { "epoch": 2.27, "eval_loss": 1.2391642332077026, "eval_runtime": 221.0121, "eval_samples_per_second": 452.464, "eval_steps_per_second": 14.14, "step": 241500 }, { "epoch": 2.27, "learning_rate": 2.729021602447402e-05, "loss": 1.3965, "step": 242000 }, { "epoch": 2.27, "eval_loss": 1.234950304031372, "eval_runtime": 221.0193, "eval_samples_per_second": 452.449, "eval_steps_per_second": 14.139, "step": 242000 }, { "epoch": 2.28, "learning_rate": 2.7243294983202266e-05, "loss": 1.4213, "step": 242500 }, { "epoch": 2.28, "eval_loss": 1.2397407293319702, "eval_runtime": 221.049, "eval_samples_per_second": 452.388, "eval_steps_per_second": 14.137, "step": 242500 }, { "epoch": 2.28, "learning_rate": 2.719637394193052e-05, "loss": 1.4205, "step": 243000 }, { "epoch": 2.28, "eval_loss": 1.239810824394226, "eval_runtime": 221.0771, "eval_samples_per_second": 452.331, "eval_steps_per_second": 14.135, "step": 243000 }, { "epoch": 2.29, "learning_rate": 2.7149452900658773e-05, "loss": 1.4043, "step": 243500 }, { "epoch": 2.29, "eval_loss": 1.234268307685852, "eval_runtime": 221.0918, "eval_samples_per_second": 452.301, "eval_steps_per_second": 14.134, "step": 243500 }, { "epoch": 2.29, "learning_rate": 2.7102531859387026e-05, "loss": 1.4253, "step": 244000 }, { "epoch": 2.29, "eval_loss": 1.2382601499557495, "eval_runtime": 221.0904, "eval_samples_per_second": 452.304, "eval_steps_per_second": 14.134, "step": 244000 }, { "epoch": 2.29, "learning_rate": 2.705561081811528e-05, "loss": 1.417, "step": 244500 }, { "epoch": 2.29, "eval_loss": 1.2414758205413818, "eval_runtime": 221.0991, "eval_samples_per_second": 452.286, "eval_steps_per_second": 14.134, "step": 244500 }, { "epoch": 2.3, "learning_rate": 2.7008689776843526e-05, "loss": 1.4246, "step": 245000 }, { "epoch": 2.3, "eval_loss": 1.2350417375564575, "eval_runtime": 221.1225, "eval_samples_per_second": 452.238, "eval_steps_per_second": 14.132, "step": 245000 }, { "epoch": 2.3, "learning_rate": 2.696176873557178e-05, "loss": 1.4098, "step": 245500 }, { "epoch": 2.3, "eval_loss": 1.231085181236267, "eval_runtime": 221.136, "eval_samples_per_second": 452.21, "eval_steps_per_second": 14.132, "step": 245500 }, { "epoch": 2.31, "learning_rate": 2.6914847694300032e-05, "loss": 1.4127, "step": 246000 }, { "epoch": 2.31, "eval_loss": 1.2361029386520386, "eval_runtime": 221.7853, "eval_samples_per_second": 450.887, "eval_steps_per_second": 14.09, "step": 246000 }, { "epoch": 2.31, "learning_rate": 2.6867926653028286e-05, "loss": 1.3934, "step": 246500 }, { "epoch": 2.31, "eval_loss": 1.2260022163391113, "eval_runtime": 221.7286, "eval_samples_per_second": 451.002, "eval_steps_per_second": 14.094, "step": 246500 }, { "epoch": 2.32, "learning_rate": 2.682100561175654e-05, "loss": 1.4177, "step": 247000 }, { "epoch": 2.32, "eval_loss": 1.2279936075210571, "eval_runtime": 221.7401, "eval_samples_per_second": 450.979, "eval_steps_per_second": 14.093, "step": 247000 }, { "epoch": 2.32, "learning_rate": 2.6774084570484792e-05, "loss": 1.393, "step": 247500 }, { "epoch": 2.32, "eval_loss": 1.230800986289978, "eval_runtime": 221.7152, "eval_samples_per_second": 451.029, "eval_steps_per_second": 14.095, "step": 247500 }, { "epoch": 2.33, "learning_rate": 2.672716352921304e-05, "loss": 1.4007, "step": 248000 }, { "epoch": 2.33, "eval_loss": 1.2362948656082153, "eval_runtime": 221.5844, "eval_samples_per_second": 451.295, "eval_steps_per_second": 14.103, "step": 248000 }, { "epoch": 2.33, "learning_rate": 2.6680242487941292e-05, "loss": 1.4119, "step": 248500 }, { "epoch": 2.33, "eval_loss": 1.2321358919143677, "eval_runtime": 221.5867, "eval_samples_per_second": 451.291, "eval_steps_per_second": 14.103, "step": 248500 }, { "epoch": 2.34, "learning_rate": 2.6633321446669545e-05, "loss": 1.4134, "step": 249000 }, { "epoch": 2.34, "eval_loss": 1.2312347888946533, "eval_runtime": 221.6239, "eval_samples_per_second": 451.215, "eval_steps_per_second": 14.1, "step": 249000 }, { "epoch": 2.34, "learning_rate": 2.65864004053978e-05, "loss": 1.3995, "step": 249500 }, { "epoch": 2.34, "eval_loss": 1.227805495262146, "eval_runtime": 221.6235, "eval_samples_per_second": 451.216, "eval_steps_per_second": 14.1, "step": 249500 }, { "epoch": 2.35, "learning_rate": 2.6539479364126052e-05, "loss": 1.4042, "step": 250000 }, { "epoch": 2.35, "eval_loss": 1.2257417440414429, "eval_runtime": 221.6629, "eval_samples_per_second": 451.136, "eval_steps_per_second": 14.098, "step": 250000 }, { "epoch": 2.35, "learning_rate": 2.64925583228543e-05, "loss": 1.4041, "step": 250500 }, { "epoch": 2.35, "eval_loss": 1.2193177938461304, "eval_runtime": 221.6777, "eval_samples_per_second": 451.105, "eval_steps_per_second": 14.097, "step": 250500 }, { "epoch": 2.36, "learning_rate": 2.6445637281582552e-05, "loss": 1.4014, "step": 251000 }, { "epoch": 2.36, "eval_loss": 1.2312824726104736, "eval_runtime": 221.7704, "eval_samples_per_second": 450.917, "eval_steps_per_second": 14.091, "step": 251000 }, { "epoch": 2.36, "learning_rate": 2.6398716240310805e-05, "loss": 1.4122, "step": 251500 }, { "epoch": 2.36, "eval_loss": 1.2306718826293945, "eval_runtime": 221.7735, "eval_samples_per_second": 450.911, "eval_steps_per_second": 14.091, "step": 251500 }, { "epoch": 2.36, "learning_rate": 2.6351795199039058e-05, "loss": 1.3839, "step": 252000 }, { "epoch": 2.36, "eval_loss": 1.2296006679534912, "eval_runtime": 221.8207, "eval_samples_per_second": 450.815, "eval_steps_per_second": 14.088, "step": 252000 }, { "epoch": 2.37, "learning_rate": 2.630487415776731e-05, "loss": 1.4113, "step": 252500 }, { "epoch": 2.37, "eval_loss": 1.2209049463272095, "eval_runtime": 221.8254, "eval_samples_per_second": 450.805, "eval_steps_per_second": 14.088, "step": 252500 }, { "epoch": 2.37, "learning_rate": 2.6257953116495565e-05, "loss": 1.3818, "step": 253000 }, { "epoch": 2.37, "eval_loss": 1.2337490320205688, "eval_runtime": 221.8579, "eval_samples_per_second": 450.739, "eval_steps_per_second": 14.086, "step": 253000 }, { "epoch": 2.38, "learning_rate": 2.621103207522381e-05, "loss": 1.4197, "step": 253500 }, { "epoch": 2.38, "eval_loss": 1.2286659479141235, "eval_runtime": 222.042, "eval_samples_per_second": 450.365, "eval_steps_per_second": 14.074, "step": 253500 }, { "epoch": 2.38, "learning_rate": 2.6164111033952065e-05, "loss": 1.407, "step": 254000 }, { "epoch": 2.38, "eval_loss": 1.2235952615737915, "eval_runtime": 222.0342, "eval_samples_per_second": 450.381, "eval_steps_per_second": 14.074, "step": 254000 }, { "epoch": 2.39, "learning_rate": 2.6117189992680318e-05, "loss": 1.4031, "step": 254500 }, { "epoch": 2.39, "eval_loss": 1.2171984910964966, "eval_runtime": 222.0173, "eval_samples_per_second": 450.415, "eval_steps_per_second": 14.075, "step": 254500 }, { "epoch": 2.39, "learning_rate": 2.607026895140857e-05, "loss": 1.39, "step": 255000 }, { "epoch": 2.39, "eval_loss": 1.2256048917770386, "eval_runtime": 221.9334, "eval_samples_per_second": 450.586, "eval_steps_per_second": 14.081, "step": 255000 }, { "epoch": 2.4, "learning_rate": 2.6023347910136824e-05, "loss": 1.3946, "step": 255500 }, { "epoch": 2.4, "eval_loss": 1.2248618602752686, "eval_runtime": 221.838, "eval_samples_per_second": 450.779, "eval_steps_per_second": 14.087, "step": 255500 }, { "epoch": 2.4, "learning_rate": 2.597642686886507e-05, "loss": 1.417, "step": 256000 }, { "epoch": 2.4, "eval_loss": 1.2168443202972412, "eval_runtime": 221.8642, "eval_samples_per_second": 450.726, "eval_steps_per_second": 14.085, "step": 256000 }, { "epoch": 2.41, "learning_rate": 2.5929505827593324e-05, "loss": 1.4065, "step": 256500 }, { "epoch": 2.41, "eval_loss": 1.2248560190200806, "eval_runtime": 221.8502, "eval_samples_per_second": 450.755, "eval_steps_per_second": 14.086, "step": 256500 }, { "epoch": 2.41, "learning_rate": 2.5882584786321578e-05, "loss": 1.4046, "step": 257000 }, { "epoch": 2.41, "eval_loss": 1.2218291759490967, "eval_runtime": 221.8436, "eval_samples_per_second": 450.768, "eval_steps_per_second": 14.086, "step": 257000 }, { "epoch": 2.42, "learning_rate": 2.583566374504983e-05, "loss": 1.4101, "step": 257500 }, { "epoch": 2.42, "eval_loss": 1.2202662229537964, "eval_runtime": 221.8576, "eval_samples_per_second": 450.74, "eval_steps_per_second": 14.086, "step": 257500 }, { "epoch": 2.42, "learning_rate": 2.5788742703778084e-05, "loss": 1.3997, "step": 258000 }, { "epoch": 2.42, "eval_loss": 1.2214444875717163, "eval_runtime": 221.8565, "eval_samples_per_second": 450.742, "eval_steps_per_second": 14.086, "step": 258000 }, { "epoch": 2.43, "learning_rate": 2.5741821662506337e-05, "loss": 1.4134, "step": 258500 }, { "epoch": 2.43, "eval_loss": 1.220430612564087, "eval_runtime": 221.8826, "eval_samples_per_second": 450.689, "eval_steps_per_second": 14.084, "step": 258500 }, { "epoch": 2.43, "learning_rate": 2.5694900621234584e-05, "loss": 1.3823, "step": 259000 }, { "epoch": 2.43, "eval_loss": 1.2182176113128662, "eval_runtime": 221.9231, "eval_samples_per_second": 450.606, "eval_steps_per_second": 14.081, "step": 259000 }, { "epoch": 2.44, "learning_rate": 2.5647979579962837e-05, "loss": 1.3854, "step": 259500 }, { "epoch": 2.44, "eval_loss": 1.2200777530670166, "eval_runtime": 221.9107, "eval_samples_per_second": 450.632, "eval_steps_per_second": 14.082, "step": 259500 }, { "epoch": 2.44, "learning_rate": 2.560105853869109e-05, "loss": 1.4126, "step": 260000 }, { "epoch": 2.44, "eval_loss": 1.2206027507781982, "eval_runtime": 221.8721, "eval_samples_per_second": 450.71, "eval_steps_per_second": 14.085, "step": 260000 }, { "epoch": 2.44, "learning_rate": 2.5554137497419344e-05, "loss": 1.3986, "step": 260500 }, { "epoch": 2.44, "eval_loss": 1.2171374559402466, "eval_runtime": 222.0524, "eval_samples_per_second": 450.344, "eval_steps_per_second": 14.073, "step": 260500 }, { "epoch": 2.45, "learning_rate": 2.5507216456147597e-05, "loss": 1.401, "step": 261000 }, { "epoch": 2.45, "eval_loss": 1.220663070678711, "eval_runtime": 222.0724, "eval_samples_per_second": 450.304, "eval_steps_per_second": 14.072, "step": 261000 }, { "epoch": 2.45, "learning_rate": 2.5460295414875847e-05, "loss": 1.3983, "step": 261500 }, { "epoch": 2.45, "eval_loss": 1.2131149768829346, "eval_runtime": 222.0269, "eval_samples_per_second": 450.396, "eval_steps_per_second": 14.075, "step": 261500 }, { "epoch": 2.46, "learning_rate": 2.5413374373604097e-05, "loss": 1.3837, "step": 262000 }, { "epoch": 2.46, "eval_loss": 1.2130707502365112, "eval_runtime": 222.0086, "eval_samples_per_second": 450.433, "eval_steps_per_second": 14.076, "step": 262000 }, { "epoch": 2.46, "learning_rate": 2.536645333233235e-05, "loss": 1.3935, "step": 262500 }, { "epoch": 2.46, "eval_loss": 1.2063870429992676, "eval_runtime": 221.8375, "eval_samples_per_second": 450.78, "eval_steps_per_second": 14.087, "step": 262500 }, { "epoch": 2.47, "learning_rate": 2.5319532291060603e-05, "loss": 1.4004, "step": 263000 }, { "epoch": 2.47, "eval_loss": 1.21162748336792, "eval_runtime": 221.8742, "eval_samples_per_second": 450.706, "eval_steps_per_second": 14.085, "step": 263000 }, { "epoch": 2.47, "learning_rate": 2.5272611249788857e-05, "loss": 1.3934, "step": 263500 }, { "epoch": 2.47, "eval_loss": 1.2112177610397339, "eval_runtime": 221.8733, "eval_samples_per_second": 450.708, "eval_steps_per_second": 14.085, "step": 263500 }, { "epoch": 2.48, "learning_rate": 2.522569020851711e-05, "loss": 1.3747, "step": 264000 }, { "epoch": 2.48, "eval_loss": 1.217407464981079, "eval_runtime": 221.8721, "eval_samples_per_second": 450.71, "eval_steps_per_second": 14.085, "step": 264000 }, { "epoch": 2.48, "learning_rate": 2.517876916724536e-05, "loss": 1.3891, "step": 264500 }, { "epoch": 2.48, "eval_loss": 1.215196132659912, "eval_runtime": 221.9107, "eval_samples_per_second": 450.632, "eval_steps_per_second": 14.082, "step": 264500 }, { "epoch": 2.49, "learning_rate": 2.5131848125973613e-05, "loss": 1.3753, "step": 265000 }, { "epoch": 2.49, "eval_loss": 1.2109936475753784, "eval_runtime": 221.8926, "eval_samples_per_second": 450.668, "eval_steps_per_second": 14.083, "step": 265000 }, { "epoch": 2.49, "learning_rate": 2.5084927084701863e-05, "loss": 1.4029, "step": 265500 }, { "epoch": 2.49, "eval_loss": 1.2136719226837158, "eval_runtime": 221.9088, "eval_samples_per_second": 450.636, "eval_steps_per_second": 14.082, "step": 265500 }, { "epoch": 2.5, "learning_rate": 2.5038006043430116e-05, "loss": 1.3716, "step": 266000 }, { "epoch": 2.5, "eval_loss": 1.2064990997314453, "eval_runtime": 221.917, "eval_samples_per_second": 450.619, "eval_steps_per_second": 14.082, "step": 266000 }, { "epoch": 2.5, "learning_rate": 2.499108500215837e-05, "loss": 1.3917, "step": 266500 }, { "epoch": 2.5, "eval_loss": 1.2140556573867798, "eval_runtime": 221.8926, "eval_samples_per_second": 450.668, "eval_steps_per_second": 14.083, "step": 266500 }, { "epoch": 2.51, "learning_rate": 2.494416396088662e-05, "loss": 1.3958, "step": 267000 }, { "epoch": 2.51, "eval_loss": 1.2115426063537598, "eval_runtime": 221.881, "eval_samples_per_second": 450.692, "eval_steps_per_second": 14.084, "step": 267000 }, { "epoch": 2.51, "learning_rate": 2.4897242919614873e-05, "loss": 1.388, "step": 267500 }, { "epoch": 2.51, "eval_loss": 1.2005212306976318, "eval_runtime": 222.0705, "eval_samples_per_second": 450.307, "eval_steps_per_second": 14.072, "step": 267500 }, { "epoch": 2.51, "learning_rate": 2.4850321878343126e-05, "loss": 1.3769, "step": 268000 }, { "epoch": 2.51, "eval_loss": 1.2068332433700562, "eval_runtime": 222.0339, "eval_samples_per_second": 450.382, "eval_steps_per_second": 14.074, "step": 268000 }, { "epoch": 2.52, "learning_rate": 2.480340083707138e-05, "loss": 1.4106, "step": 268500 }, { "epoch": 2.52, "eval_loss": 1.2093212604522705, "eval_runtime": 222.0101, "eval_samples_per_second": 450.43, "eval_steps_per_second": 14.076, "step": 268500 }, { "epoch": 2.52, "learning_rate": 2.475647979579963e-05, "loss": 1.3883, "step": 269000 }, { "epoch": 2.52, "eval_loss": 1.2118886709213257, "eval_runtime": 221.9501, "eval_samples_per_second": 450.552, "eval_steps_per_second": 14.08, "step": 269000 }, { "epoch": 2.53, "learning_rate": 2.4709558754527883e-05, "loss": 1.3742, "step": 269500 }, { "epoch": 2.53, "eval_loss": 1.2088725566864014, "eval_runtime": 221.8528, "eval_samples_per_second": 450.749, "eval_steps_per_second": 14.086, "step": 269500 }, { "epoch": 2.53, "learning_rate": 2.4662637713256136e-05, "loss": 1.3922, "step": 270000 }, { "epoch": 2.53, "eval_loss": 1.2059705257415771, "eval_runtime": 221.9028, "eval_samples_per_second": 450.648, "eval_steps_per_second": 14.083, "step": 270000 }, { "epoch": 2.54, "learning_rate": 2.4615716671984386e-05, "loss": 1.3924, "step": 270500 }, { "epoch": 2.54, "eval_loss": 1.2069438695907593, "eval_runtime": 221.8382, "eval_samples_per_second": 450.779, "eval_steps_per_second": 14.087, "step": 270500 }, { "epoch": 2.54, "learning_rate": 2.456879563071264e-05, "loss": 1.4043, "step": 271000 }, { "epoch": 2.54, "eval_loss": 1.2041146755218506, "eval_runtime": 221.8591, "eval_samples_per_second": 450.736, "eval_steps_per_second": 14.086, "step": 271000 }, { "epoch": 2.55, "learning_rate": 2.4521874589440892e-05, "loss": 1.3877, "step": 271500 }, { "epoch": 2.55, "eval_loss": 1.209468960762024, "eval_runtime": 221.837, "eval_samples_per_second": 450.781, "eval_steps_per_second": 14.087, "step": 271500 }, { "epoch": 2.55, "learning_rate": 2.4474953548169142e-05, "loss": 1.3866, "step": 272000 }, { "epoch": 2.55, "eval_loss": 1.2078853845596313, "eval_runtime": 221.8543, "eval_samples_per_second": 450.746, "eval_steps_per_second": 14.086, "step": 272000 }, { "epoch": 2.56, "learning_rate": 2.4428032506897396e-05, "loss": 1.3892, "step": 272500 }, { "epoch": 2.56, "eval_loss": 1.203126311302185, "eval_runtime": 221.8674, "eval_samples_per_second": 450.72, "eval_steps_per_second": 14.085, "step": 272500 }, { "epoch": 2.56, "learning_rate": 2.438111146562565e-05, "loss": 1.3876, "step": 273000 }, { "epoch": 2.56, "eval_loss": 1.208136796951294, "eval_runtime": 221.8861, "eval_samples_per_second": 450.682, "eval_steps_per_second": 14.084, "step": 273000 }, { "epoch": 2.57, "learning_rate": 2.43341904243539e-05, "loss": 1.3791, "step": 273500 }, { "epoch": 2.57, "eval_loss": 1.2000367641448975, "eval_runtime": 221.8811, "eval_samples_per_second": 450.692, "eval_steps_per_second": 14.084, "step": 273500 }, { "epoch": 2.57, "learning_rate": 2.4287269383082152e-05, "loss": 1.3721, "step": 274000 }, { "epoch": 2.57, "eval_loss": 1.1960883140563965, "eval_runtime": 221.8902, "eval_samples_per_second": 450.673, "eval_steps_per_second": 14.084, "step": 274000 }, { "epoch": 2.58, "learning_rate": 2.4240348341810405e-05, "loss": 1.3758, "step": 274500 }, { "epoch": 2.58, "eval_loss": 1.1972496509552002, "eval_runtime": 222.0858, "eval_samples_per_second": 450.276, "eval_steps_per_second": 14.071, "step": 274500 }, { "epoch": 2.58, "learning_rate": 2.4193427300538655e-05, "loss": 1.3854, "step": 275000 }, { "epoch": 2.58, "eval_loss": 1.2011773586273193, "eval_runtime": 222.0189, "eval_samples_per_second": 450.412, "eval_steps_per_second": 14.075, "step": 275000 }, { "epoch": 2.59, "learning_rate": 2.414650625926691e-05, "loss": 1.3815, "step": 275500 }, { "epoch": 2.59, "eval_loss": 1.200337290763855, "eval_runtime": 222.0085, "eval_samples_per_second": 450.433, "eval_steps_per_second": 14.076, "step": 275500 }, { "epoch": 2.59, "learning_rate": 2.409958521799516e-05, "loss": 1.37, "step": 276000 }, { "epoch": 2.59, "eval_loss": 1.203967809677124, "eval_runtime": 221.9584, "eval_samples_per_second": 450.535, "eval_steps_per_second": 14.079, "step": 276000 }, { "epoch": 2.59, "learning_rate": 2.405266417672341e-05, "loss": 1.3913, "step": 276500 }, { "epoch": 2.59, "eval_loss": 1.193058729171753, "eval_runtime": 221.8004, "eval_samples_per_second": 450.856, "eval_steps_per_second": 14.089, "step": 276500 }, { "epoch": 2.6, "learning_rate": 2.4005743135451665e-05, "loss": 1.3707, "step": 277000 }, { "epoch": 2.6, "eval_loss": 1.2021199464797974, "eval_runtime": 221.8184, "eval_samples_per_second": 450.819, "eval_steps_per_second": 14.088, "step": 277000 }, { "epoch": 2.6, "learning_rate": 2.3958822094179915e-05, "loss": 1.3903, "step": 277500 }, { "epoch": 2.6, "eval_loss": 1.1956475973129272, "eval_runtime": 221.7913, "eval_samples_per_second": 450.874, "eval_steps_per_second": 14.09, "step": 277500 }, { "epoch": 2.61, "learning_rate": 2.3911901052908168e-05, "loss": 1.3792, "step": 278000 }, { "epoch": 2.61, "eval_loss": 1.1976265907287598, "eval_runtime": 221.8164, "eval_samples_per_second": 450.823, "eval_steps_per_second": 14.088, "step": 278000 }, { "epoch": 2.61, "learning_rate": 2.386498001163642e-05, "loss": 1.3765, "step": 278500 }, { "epoch": 2.61, "eval_loss": 1.2006570100784302, "eval_runtime": 221.8144, "eval_samples_per_second": 450.827, "eval_steps_per_second": 14.088, "step": 278500 }, { "epoch": 2.62, "learning_rate": 2.381805897036467e-05, "loss": 1.3589, "step": 279000 }, { "epoch": 2.62, "eval_loss": 1.1913626194000244, "eval_runtime": 221.8059, "eval_samples_per_second": 450.845, "eval_steps_per_second": 14.089, "step": 279000 }, { "epoch": 2.62, "learning_rate": 2.3771137929092925e-05, "loss": 1.3887, "step": 279500 }, { "epoch": 2.62, "eval_loss": 1.1945520639419556, "eval_runtime": 221.8425, "eval_samples_per_second": 450.77, "eval_steps_per_second": 14.087, "step": 279500 }, { "epoch": 2.63, "learning_rate": 2.3724216887821178e-05, "loss": 1.3897, "step": 280000 }, { "epoch": 2.63, "eval_loss": 1.1894328594207764, "eval_runtime": 221.8345, "eval_samples_per_second": 450.786, "eval_steps_per_second": 14.087, "step": 280000 }, { "epoch": 2.63, "learning_rate": 2.3677295846549428e-05, "loss": 1.3875, "step": 280500 }, { "epoch": 2.63, "eval_loss": 1.190177083015442, "eval_runtime": 221.8514, "eval_samples_per_second": 450.752, "eval_steps_per_second": 14.086, "step": 280500 }, { "epoch": 2.64, "learning_rate": 2.363037480527768e-05, "loss": 1.3796, "step": 281000 }, { "epoch": 2.64, "eval_loss": 1.1883279085159302, "eval_runtime": 221.8103, "eval_samples_per_second": 450.836, "eval_steps_per_second": 14.089, "step": 281000 }, { "epoch": 2.64, "learning_rate": 2.3583453764005934e-05, "loss": 1.3563, "step": 281500 }, { "epoch": 2.64, "eval_loss": 1.1856937408447266, "eval_runtime": 222.0332, "eval_samples_per_second": 450.383, "eval_steps_per_second": 14.074, "step": 281500 }, { "epoch": 2.65, "learning_rate": 2.3536532722734184e-05, "loss": 1.3546, "step": 282000 }, { "epoch": 2.65, "eval_loss": 1.1944658756256104, "eval_runtime": 221.9876, "eval_samples_per_second": 450.476, "eval_steps_per_second": 14.077, "step": 282000 }, { "epoch": 2.65, "learning_rate": 2.3489611681462438e-05, "loss": 1.3597, "step": 282500 }, { "epoch": 2.65, "eval_loss": 1.1892412900924683, "eval_runtime": 221.924, "eval_samples_per_second": 450.605, "eval_steps_per_second": 14.081, "step": 282500 }, { "epoch": 2.66, "learning_rate": 2.3442690640190687e-05, "loss": 1.368, "step": 283000 }, { "epoch": 2.66, "eval_loss": 1.1899220943450928, "eval_runtime": 221.8992, "eval_samples_per_second": 450.655, "eval_steps_per_second": 14.083, "step": 283000 }, { "epoch": 2.66, "learning_rate": 2.339576959891894e-05, "loss": 1.3766, "step": 283500 }, { "epoch": 2.66, "eval_loss": 1.1951682567596436, "eval_runtime": 221.7577, "eval_samples_per_second": 450.943, "eval_steps_per_second": 14.092, "step": 283500 }, { "epoch": 2.67, "learning_rate": 2.3348848557647194e-05, "loss": 1.369, "step": 284000 }, { "epoch": 2.67, "eval_loss": 1.1905887126922607, "eval_runtime": 221.7249, "eval_samples_per_second": 451.009, "eval_steps_per_second": 14.094, "step": 284000 }, { "epoch": 2.67, "learning_rate": 2.3301927516375444e-05, "loss": 1.3634, "step": 284500 }, { "epoch": 2.67, "eval_loss": 1.1932637691497803, "eval_runtime": 221.7372, "eval_samples_per_second": 450.984, "eval_steps_per_second": 14.093, "step": 284500 }, { "epoch": 2.67, "learning_rate": 2.3255006475103697e-05, "loss": 1.3571, "step": 285000 }, { "epoch": 2.67, "eval_loss": 1.1916605234146118, "eval_runtime": 221.7465, "eval_samples_per_second": 450.965, "eval_steps_per_second": 14.093, "step": 285000 }, { "epoch": 2.68, "learning_rate": 2.320808543383195e-05, "loss": 1.3663, "step": 285500 }, { "epoch": 2.68, "eval_loss": 1.1863957643508911, "eval_runtime": 221.7586, "eval_samples_per_second": 450.941, "eval_steps_per_second": 14.092, "step": 285500 }, { "epoch": 2.68, "learning_rate": 2.31611643925602e-05, "loss": 1.3437, "step": 286000 }, { "epoch": 2.68, "eval_loss": 1.186560034751892, "eval_runtime": 221.7879, "eval_samples_per_second": 450.881, "eval_steps_per_second": 14.09, "step": 286000 }, { "epoch": 2.69, "learning_rate": 2.3114243351288454e-05, "loss": 1.3614, "step": 286500 }, { "epoch": 2.69, "eval_loss": 1.1834124326705933, "eval_runtime": 221.7759, "eval_samples_per_second": 450.906, "eval_steps_per_second": 14.091, "step": 286500 }, { "epoch": 2.69, "learning_rate": 2.3067322310016707e-05, "loss": 1.3561, "step": 287000 }, { "epoch": 2.69, "eval_loss": 1.1831755638122559, "eval_runtime": 221.7594, "eval_samples_per_second": 450.939, "eval_steps_per_second": 14.092, "step": 287000 }, { "epoch": 2.7, "learning_rate": 2.3020401268744957e-05, "loss": 1.3634, "step": 287500 }, { "epoch": 2.7, "eval_loss": 1.1881248950958252, "eval_runtime": 221.7452, "eval_samples_per_second": 450.968, "eval_steps_per_second": 14.093, "step": 287500 }, { "epoch": 2.7, "learning_rate": 2.297348022747321e-05, "loss": 1.3574, "step": 288000 }, { "epoch": 2.7, "eval_loss": 1.1852173805236816, "eval_runtime": 221.7114, "eval_samples_per_second": 451.037, "eval_steps_per_second": 14.095, "step": 288000 }, { "epoch": 2.71, "learning_rate": 2.292655918620146e-05, "loss": 1.377, "step": 288500 }, { "epoch": 2.71, "eval_loss": 1.1913198232650757, "eval_runtime": 221.8189, "eval_samples_per_second": 450.818, "eval_steps_per_second": 14.088, "step": 288500 }, { "epoch": 2.71, "learning_rate": 2.2879638144929713e-05, "loss": 1.3511, "step": 289000 }, { "epoch": 2.71, "eval_loss": 1.1883938312530518, "eval_runtime": 221.884, "eval_samples_per_second": 450.686, "eval_steps_per_second": 14.084, "step": 289000 }, { "epoch": 2.72, "learning_rate": 2.2832717103657967e-05, "loss": 1.3486, "step": 289500 }, { "epoch": 2.72, "eval_loss": 1.185508370399475, "eval_runtime": 221.7951, "eval_samples_per_second": 450.867, "eval_steps_per_second": 14.09, "step": 289500 }, { "epoch": 2.72, "learning_rate": 2.2785796062386216e-05, "loss": 1.3584, "step": 290000 }, { "epoch": 2.72, "eval_loss": 1.1850634813308716, "eval_runtime": 221.7253, "eval_samples_per_second": 451.009, "eval_steps_per_second": 14.094, "step": 290000 }, { "epoch": 2.73, "learning_rate": 2.273887502111447e-05, "loss": 1.3634, "step": 290500 }, { "epoch": 2.73, "eval_loss": 1.1823251247406006, "eval_runtime": 221.5435, "eval_samples_per_second": 451.379, "eval_steps_per_second": 14.106, "step": 290500 }, { "epoch": 2.73, "learning_rate": 2.2691953979842723e-05, "loss": 1.3652, "step": 291000 }, { "epoch": 2.73, "eval_loss": 1.181983232498169, "eval_runtime": 221.5124, "eval_samples_per_second": 451.442, "eval_steps_per_second": 14.108, "step": 291000 }, { "epoch": 2.74, "learning_rate": 2.2645032938570973e-05, "loss": 1.3612, "step": 291500 }, { "epoch": 2.74, "eval_loss": 1.182550311088562, "eval_runtime": 221.4655, "eval_samples_per_second": 451.538, "eval_steps_per_second": 14.111, "step": 291500 }, { "epoch": 2.74, "learning_rate": 2.2598111897299226e-05, "loss": 1.3827, "step": 292000 }, { "epoch": 2.74, "eval_loss": 1.1845837831497192, "eval_runtime": 221.4927, "eval_samples_per_second": 451.482, "eval_steps_per_second": 14.109, "step": 292000 }, { "epoch": 2.74, "learning_rate": 2.255119085602748e-05, "loss": 1.3722, "step": 292500 }, { "epoch": 2.74, "eval_loss": 1.1796345710754395, "eval_runtime": 221.4954, "eval_samples_per_second": 451.477, "eval_steps_per_second": 14.109, "step": 292500 }, { "epoch": 2.75, "learning_rate": 2.250426981475573e-05, "loss": 1.3608, "step": 293000 }, { "epoch": 2.75, "eval_loss": 1.1794308423995972, "eval_runtime": 221.4654, "eval_samples_per_second": 451.538, "eval_steps_per_second": 14.111, "step": 293000 }, { "epoch": 2.75, "learning_rate": 2.2457348773483983e-05, "loss": 1.3601, "step": 293500 }, { "epoch": 2.75, "eval_loss": 1.1854252815246582, "eval_runtime": 221.506, "eval_samples_per_second": 451.455, "eval_steps_per_second": 14.108, "step": 293500 }, { "epoch": 2.76, "learning_rate": 2.2410427732212233e-05, "loss": 1.3723, "step": 294000 }, { "epoch": 2.76, "eval_loss": 1.1760536432266235, "eval_runtime": 221.4862, "eval_samples_per_second": 451.495, "eval_steps_per_second": 14.109, "step": 294000 }, { "epoch": 2.76, "learning_rate": 2.2363506690940486e-05, "loss": 1.3559, "step": 294500 }, { "epoch": 2.76, "eval_loss": 1.1737596988677979, "eval_runtime": 221.496, "eval_samples_per_second": 451.475, "eval_steps_per_second": 14.109, "step": 294500 }, { "epoch": 2.77, "learning_rate": 2.231658564966874e-05, "loss": 1.3596, "step": 295000 }, { "epoch": 2.77, "eval_loss": 1.1840267181396484, "eval_runtime": 221.4671, "eval_samples_per_second": 451.534, "eval_steps_per_second": 14.11, "step": 295000 }, { "epoch": 2.77, "learning_rate": 2.226966460839699e-05, "loss": 1.3469, "step": 295500 }, { "epoch": 2.77, "eval_loss": 1.1722980737686157, "eval_runtime": 221.4589, "eval_samples_per_second": 451.551, "eval_steps_per_second": 14.111, "step": 295500 }, { "epoch": 2.78, "learning_rate": 2.2222743567125242e-05, "loss": 1.3636, "step": 296000 }, { "epoch": 2.78, "eval_loss": 1.1743708848953247, "eval_runtime": 221.6515, "eval_samples_per_second": 451.159, "eval_steps_per_second": 14.099, "step": 296000 }, { "epoch": 2.78, "learning_rate": 2.2175822525853496e-05, "loss": 1.3522, "step": 296500 }, { "epoch": 2.78, "eval_loss": 1.1773130893707275, "eval_runtime": 221.623, "eval_samples_per_second": 451.217, "eval_steps_per_second": 14.101, "step": 296500 }, { "epoch": 2.79, "learning_rate": 2.2128901484581746e-05, "loss": 1.3459, "step": 297000 }, { "epoch": 2.79, "eval_loss": 1.1674100160598755, "eval_runtime": 221.5803, "eval_samples_per_second": 451.304, "eval_steps_per_second": 14.103, "step": 297000 }, { "epoch": 2.79, "learning_rate": 2.208198044331e-05, "loss": 1.3634, "step": 297500 }, { "epoch": 2.79, "eval_loss": 1.1741117238998413, "eval_runtime": 221.517, "eval_samples_per_second": 451.433, "eval_steps_per_second": 14.107, "step": 297500 }, { "epoch": 2.8, "learning_rate": 2.2035059402038252e-05, "loss": 1.3529, "step": 298000 }, { "epoch": 2.8, "eval_loss": 1.1743310689926147, "eval_runtime": 221.3651, "eval_samples_per_second": 451.742, "eval_steps_per_second": 14.117, "step": 298000 }, { "epoch": 2.8, "learning_rate": 2.1988138360766502e-05, "loss": 1.3524, "step": 298500 }, { "epoch": 2.8, "eval_loss": 1.172131896018982, "eval_runtime": 221.343, "eval_samples_per_second": 451.788, "eval_steps_per_second": 14.118, "step": 298500 }, { "epoch": 2.81, "learning_rate": 2.1941217319494755e-05, "loss": 1.349, "step": 299000 }, { "epoch": 2.81, "eval_loss": 1.1744838953018188, "eval_runtime": 221.3399, "eval_samples_per_second": 451.794, "eval_steps_per_second": 14.119, "step": 299000 }, { "epoch": 2.81, "learning_rate": 2.1894296278223005e-05, "loss": 1.3658, "step": 299500 }, { "epoch": 2.81, "eval_loss": 1.167363166809082, "eval_runtime": 221.371, "eval_samples_per_second": 451.73, "eval_steps_per_second": 14.117, "step": 299500 }, { "epoch": 2.82, "learning_rate": 2.184737523695126e-05, "loss": 1.3663, "step": 300000 }, { "epoch": 2.82, "eval_loss": 1.1738266944885254, "eval_runtime": 221.3431, "eval_samples_per_second": 451.787, "eval_steps_per_second": 14.118, "step": 300000 }, { "epoch": 2.82, "learning_rate": 2.1800454195679512e-05, "loss": 1.3473, "step": 300500 }, { "epoch": 2.82, "eval_loss": 1.173740267753601, "eval_runtime": 221.0442, "eval_samples_per_second": 452.398, "eval_steps_per_second": 14.137, "step": 300500 }, { "epoch": 2.82, "learning_rate": 2.175353315440776e-05, "loss": 1.3589, "step": 301000 }, { "epoch": 2.82, "eval_loss": 1.1683049201965332, "eval_runtime": 221.0696, "eval_samples_per_second": 452.346, "eval_steps_per_second": 14.136, "step": 301000 }, { "epoch": 2.83, "learning_rate": 2.1706612113136015e-05, "loss": 1.347, "step": 301500 }, { "epoch": 2.83, "eval_loss": 1.1638891696929932, "eval_runtime": 221.0645, "eval_samples_per_second": 452.357, "eval_steps_per_second": 14.136, "step": 301500 }, { "epoch": 2.83, "learning_rate": 2.1659691071864268e-05, "loss": 1.3421, "step": 302000 }, { "epoch": 2.83, "eval_loss": 1.1762233972549438, "eval_runtime": 221.1041, "eval_samples_per_second": 452.276, "eval_steps_per_second": 14.134, "step": 302000 }, { "epoch": 2.84, "learning_rate": 2.1612770030592518e-05, "loss": 1.3507, "step": 302500 }, { "epoch": 2.84, "eval_loss": 1.17283034324646, "eval_runtime": 221.1012, "eval_samples_per_second": 452.282, "eval_steps_per_second": 14.134, "step": 302500 }, { "epoch": 2.84, "learning_rate": 2.156584898932077e-05, "loss": 1.3247, "step": 303000 }, { "epoch": 2.84, "eval_loss": 1.1722640991210938, "eval_runtime": 221.0672, "eval_samples_per_second": 452.351, "eval_steps_per_second": 14.136, "step": 303000 }, { "epoch": 2.85, "learning_rate": 2.1518927948049025e-05, "loss": 1.3373, "step": 303500 }, { "epoch": 2.85, "eval_loss": 1.1675173044204712, "eval_runtime": 221.061, "eval_samples_per_second": 452.364, "eval_steps_per_second": 14.136, "step": 303500 }, { "epoch": 2.85, "learning_rate": 2.1472006906777275e-05, "loss": 1.347, "step": 304000 }, { "epoch": 2.85, "eval_loss": 1.169805645942688, "eval_runtime": 221.2478, "eval_samples_per_second": 451.982, "eval_steps_per_second": 14.124, "step": 304000 }, { "epoch": 2.86, "learning_rate": 2.1425085865505528e-05, "loss": 1.3549, "step": 304500 }, { "epoch": 2.86, "eval_loss": 1.1683489084243774, "eval_runtime": 221.2569, "eval_samples_per_second": 451.963, "eval_steps_per_second": 14.124, "step": 304500 }, { "epoch": 2.86, "learning_rate": 2.137816482423378e-05, "loss": 1.3406, "step": 305000 }, { "epoch": 2.86, "eval_loss": 1.1658775806427002, "eval_runtime": 221.1911, "eval_samples_per_second": 452.098, "eval_steps_per_second": 14.128, "step": 305000 }, { "epoch": 2.87, "learning_rate": 2.133124378296203e-05, "loss": 1.3497, "step": 305500 }, { "epoch": 2.87, "eval_loss": 1.1645755767822266, "eval_runtime": 221.1253, "eval_samples_per_second": 452.232, "eval_steps_per_second": 14.132, "step": 305500 }, { "epoch": 2.87, "learning_rate": 2.1284322741690284e-05, "loss": 1.3615, "step": 306000 }, { "epoch": 2.87, "eval_loss": 1.1647412776947021, "eval_runtime": 220.9938, "eval_samples_per_second": 452.501, "eval_steps_per_second": 14.141, "step": 306000 }, { "epoch": 2.88, "learning_rate": 2.1237401700418534e-05, "loss": 1.3281, "step": 306500 }, { "epoch": 2.88, "eval_loss": 1.1673572063446045, "eval_runtime": 220.9974, "eval_samples_per_second": 452.494, "eval_steps_per_second": 14.14, "step": 306500 }, { "epoch": 2.88, "learning_rate": 2.1190480659146787e-05, "loss": 1.3447, "step": 307000 }, { "epoch": 2.88, "eval_loss": 1.1644020080566406, "eval_runtime": 221.0059, "eval_samples_per_second": 452.477, "eval_steps_per_second": 14.14, "step": 307000 }, { "epoch": 2.89, "learning_rate": 2.114355961787504e-05, "loss": 1.3518, "step": 307500 }, { "epoch": 2.89, "eval_loss": 1.1606210470199585, "eval_runtime": 220.9981, "eval_samples_per_second": 452.493, "eval_steps_per_second": 14.14, "step": 307500 }, { "epoch": 2.89, "learning_rate": 2.109663857660329e-05, "loss": 1.3268, "step": 308000 }, { "epoch": 2.89, "eval_loss": 1.172353982925415, "eval_runtime": 220.9928, "eval_samples_per_second": 452.503, "eval_steps_per_second": 14.141, "step": 308000 }, { "epoch": 2.9, "learning_rate": 2.1049717535331544e-05, "loss": 1.3281, "step": 308500 }, { "epoch": 2.9, "eval_loss": 1.1636762619018555, "eval_runtime": 221.0284, "eval_samples_per_second": 452.431, "eval_steps_per_second": 14.138, "step": 308500 }, { "epoch": 2.9, "learning_rate": 2.1002796494059797e-05, "loss": 1.3361, "step": 309000 }, { "epoch": 2.9, "eval_loss": 1.1628670692443848, "eval_runtime": 221.0125, "eval_samples_per_second": 452.463, "eval_steps_per_second": 14.139, "step": 309000 }, { "epoch": 2.9, "learning_rate": 2.0955875452788047e-05, "loss": 1.3297, "step": 309500 }, { "epoch": 2.9, "eval_loss": 1.1588451862335205, "eval_runtime": 221.003, "eval_samples_per_second": 452.482, "eval_steps_per_second": 14.14, "step": 309500 }, { "epoch": 2.91, "learning_rate": 2.09089544115163e-05, "loss": 1.3492, "step": 310000 }, { "epoch": 2.91, "eval_loss": 1.1633425951004028, "eval_runtime": 220.9707, "eval_samples_per_second": 452.549, "eval_steps_per_second": 14.142, "step": 310000 }, { "epoch": 2.91, "learning_rate": 2.0862033370244554e-05, "loss": 1.3419, "step": 310500 }, { "epoch": 2.91, "eval_loss": 1.160988211631775, "eval_runtime": 220.9533, "eval_samples_per_second": 452.584, "eval_steps_per_second": 14.143, "step": 310500 }, { "epoch": 2.92, "learning_rate": 2.0815112328972804e-05, "loss": 1.3277, "step": 311000 }, { "epoch": 2.92, "eval_loss": 1.1551365852355957, "eval_runtime": 220.9421, "eval_samples_per_second": 452.607, "eval_steps_per_second": 14.144, "step": 311000 }, { "epoch": 2.92, "learning_rate": 2.0768191287701057e-05, "loss": 1.3402, "step": 311500 }, { "epoch": 2.92, "eval_loss": 1.163130283355713, "eval_runtime": 220.9627, "eval_samples_per_second": 452.565, "eval_steps_per_second": 14.143, "step": 311500 }, { "epoch": 2.93, "learning_rate": 2.072127024642931e-05, "loss": 1.329, "step": 312000 }, { "epoch": 2.93, "eval_loss": 1.1555798053741455, "eval_runtime": 221.0607, "eval_samples_per_second": 452.364, "eval_steps_per_second": 14.136, "step": 312000 }, { "epoch": 2.93, "learning_rate": 2.067434920515756e-05, "loss": 1.3467, "step": 312500 }, { "epoch": 2.93, "eval_loss": 1.1523679494857788, "eval_runtime": 221.1292, "eval_samples_per_second": 452.224, "eval_steps_per_second": 14.132, "step": 312500 }, { "epoch": 2.94, "learning_rate": 2.0627428163885813e-05, "loss": 1.3574, "step": 313000 }, { "epoch": 2.94, "eval_loss": 1.159830093383789, "eval_runtime": 221.0551, "eval_samples_per_second": 452.376, "eval_steps_per_second": 14.137, "step": 313000 }, { "epoch": 2.94, "learning_rate": 2.0580507122614067e-05, "loss": 1.343, "step": 313500 }, { "epoch": 2.94, "eval_loss": 1.1504855155944824, "eval_runtime": 221.0384, "eval_samples_per_second": 452.41, "eval_steps_per_second": 14.138, "step": 313500 }, { "epoch": 2.95, "learning_rate": 2.0533586081342317e-05, "loss": 1.3234, "step": 314000 }, { "epoch": 2.95, "eval_loss": 1.1518688201904297, "eval_runtime": 220.9554, "eval_samples_per_second": 452.58, "eval_steps_per_second": 14.143, "step": 314000 }, { "epoch": 2.95, "learning_rate": 2.048666504007057e-05, "loss": 1.3183, "step": 314500 }, { "epoch": 2.95, "eval_loss": 1.159049391746521, "eval_runtime": 220.9401, "eval_samples_per_second": 452.611, "eval_steps_per_second": 14.144, "step": 314500 }, { "epoch": 2.96, "learning_rate": 2.0439743998798823e-05, "loss": 1.3662, "step": 315000 }, { "epoch": 2.96, "eval_loss": 1.15578031539917, "eval_runtime": 220.905, "eval_samples_per_second": 452.683, "eval_steps_per_second": 14.146, "step": 315000 }, { "epoch": 2.96, "learning_rate": 2.0392822957527073e-05, "loss": 1.3285, "step": 315500 }, { "epoch": 2.96, "eval_loss": 1.1557625532150269, "eval_runtime": 220.9213, "eval_samples_per_second": 452.65, "eval_steps_per_second": 14.145, "step": 315500 }, { "epoch": 2.97, "learning_rate": 2.0345901916255326e-05, "loss": 1.3375, "step": 316000 }, { "epoch": 2.97, "eval_loss": 1.153180480003357, "eval_runtime": 220.9165, "eval_samples_per_second": 452.66, "eval_steps_per_second": 14.146, "step": 316000 }, { "epoch": 2.97, "learning_rate": 2.029898087498358e-05, "loss": 1.3208, "step": 316500 }, { "epoch": 2.97, "eval_loss": 1.1530534029006958, "eval_runtime": 220.9013, "eval_samples_per_second": 452.691, "eval_steps_per_second": 14.147, "step": 316500 }, { "epoch": 2.97, "learning_rate": 2.025205983371183e-05, "loss": 1.3245, "step": 317000 }, { "epoch": 2.97, "eval_loss": 1.152458667755127, "eval_runtime": 220.9087, "eval_samples_per_second": 452.676, "eval_steps_per_second": 14.146, "step": 317000 }, { "epoch": 2.98, "learning_rate": 2.0205138792440083e-05, "loss": 1.3383, "step": 317500 }, { "epoch": 2.98, "eval_loss": 1.154115915298462, "eval_runtime": 220.909, "eval_samples_per_second": 452.675, "eval_steps_per_second": 14.146, "step": 317500 }, { "epoch": 2.98, "learning_rate": 2.0158217751168336e-05, "loss": 1.3139, "step": 318000 }, { "epoch": 2.98, "eval_loss": 1.1526515483856201, "eval_runtime": 220.8871, "eval_samples_per_second": 452.72, "eval_steps_per_second": 14.147, "step": 318000 }, { "epoch": 2.99, "learning_rate": 2.011129670989659e-05, "loss": 1.318, "step": 318500 }, { "epoch": 2.99, "eval_loss": 1.1521937847137451, "eval_runtime": 220.928, "eval_samples_per_second": 452.636, "eval_steps_per_second": 14.145, "step": 318500 }, { "epoch": 2.99, "learning_rate": 2.006437566862484e-05, "loss": 1.3276, "step": 319000 }, { "epoch": 2.99, "eval_loss": 1.1527777910232544, "eval_runtime": 220.9248, "eval_samples_per_second": 452.643, "eval_steps_per_second": 14.145, "step": 319000 }, { "epoch": 3.0, "learning_rate": 2.0017454627353092e-05, "loss": 1.3318, "step": 319500 }, { "epoch": 3.0, "eval_loss": 1.1514686346054077, "eval_runtime": 220.8852, "eval_samples_per_second": 452.724, "eval_steps_per_second": 14.148, "step": 319500 }, { "epoch": 3.0, "learning_rate": 1.9970533586081346e-05, "loss": 1.3396, "step": 320000 }, { "epoch": 3.0, "eval_loss": 1.153663158416748, "eval_runtime": 220.9135, "eval_samples_per_second": 452.666, "eval_steps_per_second": 14.146, "step": 320000 }, { "epoch": 3.01, "learning_rate": 1.9923612544809596e-05, "loss": 1.3293, "step": 320500 }, { "epoch": 3.01, "eval_loss": 1.1468321084976196, "eval_runtime": 220.9867, "eval_samples_per_second": 452.516, "eval_steps_per_second": 14.141, "step": 320500 }, { "epoch": 3.01, "learning_rate": 1.987669150353785e-05, "loss": 1.3166, "step": 321000 }, { "epoch": 3.01, "eval_loss": 1.1455674171447754, "eval_runtime": 221.0441, "eval_samples_per_second": 452.398, "eval_steps_per_second": 14.137, "step": 321000 }, { "epoch": 3.02, "learning_rate": 1.9829770462266102e-05, "loss": 1.3082, "step": 321500 }, { "epoch": 3.02, "eval_loss": 1.1440938711166382, "eval_runtime": 220.9927, "eval_samples_per_second": 452.504, "eval_steps_per_second": 14.141, "step": 321500 }, { "epoch": 3.02, "learning_rate": 1.9782849420994352e-05, "loss": 1.3352, "step": 322000 }, { "epoch": 3.02, "eval_loss": 1.1490601301193237, "eval_runtime": 220.9673, "eval_samples_per_second": 452.556, "eval_steps_per_second": 14.142, "step": 322000 }, { "epoch": 3.03, "learning_rate": 1.9735928379722605e-05, "loss": 1.3217, "step": 322500 }, { "epoch": 3.03, "eval_loss": 1.1430158615112305, "eval_runtime": 220.8559, "eval_samples_per_second": 452.784, "eval_steps_per_second": 14.149, "step": 322500 }, { "epoch": 3.03, "learning_rate": 1.968900733845086e-05, "loss": 1.2891, "step": 323000 }, { "epoch": 3.03, "eval_loss": 1.151960849761963, "eval_runtime": 220.8197, "eval_samples_per_second": 452.858, "eval_steps_per_second": 14.152, "step": 323000 }, { "epoch": 3.04, "learning_rate": 1.964208629717911e-05, "loss": 1.3187, "step": 323500 }, { "epoch": 3.04, "eval_loss": 1.1531556844711304, "eval_runtime": 220.8223, "eval_samples_per_second": 452.853, "eval_steps_per_second": 14.152, "step": 323500 }, { "epoch": 3.04, "learning_rate": 1.9595165255907362e-05, "loss": 1.3057, "step": 324000 }, { "epoch": 3.04, "eval_loss": 1.1435818672180176, "eval_runtime": 220.8506, "eval_samples_per_second": 452.795, "eval_steps_per_second": 14.15, "step": 324000 }, { "epoch": 3.05, "learning_rate": 1.9548244214635612e-05, "loss": 1.3006, "step": 324500 }, { "epoch": 3.05, "eval_loss": 1.1423208713531494, "eval_runtime": 220.8327, "eval_samples_per_second": 452.832, "eval_steps_per_second": 14.151, "step": 324500 }, { "epoch": 3.05, "learning_rate": 1.9501323173363865e-05, "loss": 1.3062, "step": 325000 }, { "epoch": 3.05, "eval_loss": 1.1395783424377441, "eval_runtime": 220.8318, "eval_samples_per_second": 452.833, "eval_steps_per_second": 14.151, "step": 325000 }, { "epoch": 3.05, "learning_rate": 1.945440213209212e-05, "loss": 1.2993, "step": 325500 }, { "epoch": 3.05, "eval_loss": 1.139084815979004, "eval_runtime": 220.827, "eval_samples_per_second": 452.843, "eval_steps_per_second": 14.151, "step": 325500 }, { "epoch": 3.06, "learning_rate": 1.9407481090820368e-05, "loss": 1.3258, "step": 326000 }, { "epoch": 3.06, "eval_loss": 1.1458795070648193, "eval_runtime": 220.8245, "eval_samples_per_second": 452.848, "eval_steps_per_second": 14.152, "step": 326000 }, { "epoch": 3.06, "learning_rate": 1.936056004954862e-05, "loss": 1.3166, "step": 326500 }, { "epoch": 3.06, "eval_loss": 1.1415066719055176, "eval_runtime": 220.8333, "eval_samples_per_second": 452.83, "eval_steps_per_second": 14.151, "step": 326500 }, { "epoch": 3.07, "learning_rate": 1.9313639008276875e-05, "loss": 1.3062, "step": 327000 }, { "epoch": 3.07, "eval_loss": 1.141654372215271, "eval_runtime": 220.8651, "eval_samples_per_second": 452.765, "eval_steps_per_second": 14.149, "step": 327000 }, { "epoch": 3.07, "learning_rate": 1.9266717967005125e-05, "loss": 1.3091, "step": 327500 }, { "epoch": 3.07, "eval_loss": 1.1461706161499023, "eval_runtime": 220.8785, "eval_samples_per_second": 452.738, "eval_steps_per_second": 14.148, "step": 327500 }, { "epoch": 3.08, "learning_rate": 1.9219796925733378e-05, "loss": 1.2958, "step": 328000 }, { "epoch": 3.08, "eval_loss": 1.1399269104003906, "eval_runtime": 220.8706, "eval_samples_per_second": 452.754, "eval_steps_per_second": 14.149, "step": 328000 }, { "epoch": 3.08, "learning_rate": 1.917287588446163e-05, "loss": 1.3067, "step": 328500 }, { "epoch": 3.08, "eval_loss": 1.144376516342163, "eval_runtime": 220.8285, "eval_samples_per_second": 452.84, "eval_steps_per_second": 14.151, "step": 328500 }, { "epoch": 3.09, "learning_rate": 1.912595484318988e-05, "loss": 1.319, "step": 329000 }, { "epoch": 3.09, "eval_loss": 1.1402838230133057, "eval_runtime": 220.844, "eval_samples_per_second": 452.808, "eval_steps_per_second": 14.15, "step": 329000 }, { "epoch": 3.09, "learning_rate": 1.9079033801918134e-05, "loss": 1.3399, "step": 329500 }, { "epoch": 3.09, "eval_loss": 1.139337182044983, "eval_runtime": 220.9399, "eval_samples_per_second": 452.612, "eval_steps_per_second": 14.144, "step": 329500 }, { "epoch": 3.1, "learning_rate": 1.9032112760646384e-05, "loss": 1.3077, "step": 330000 }, { "epoch": 3.1, "eval_loss": 1.1460225582122803, "eval_runtime": 220.9227, "eval_samples_per_second": 452.647, "eval_steps_per_second": 14.145, "step": 330000 }, { "epoch": 3.1, "learning_rate": 1.8985191719374638e-05, "loss": 1.2905, "step": 330500 }, { "epoch": 3.1, "eval_loss": 1.1372867822647095, "eval_runtime": 220.8826, "eval_samples_per_second": 452.729, "eval_steps_per_second": 14.148, "step": 330500 }, { "epoch": 3.11, "learning_rate": 1.893827067810289e-05, "loss": 1.3308, "step": 331000 }, { "epoch": 3.11, "eval_loss": 1.1457699537277222, "eval_runtime": 220.9045, "eval_samples_per_second": 452.684, "eval_steps_per_second": 14.146, "step": 331000 }, { "epoch": 3.11, "learning_rate": 1.889134963683114e-05, "loss": 1.311, "step": 331500 }, { "epoch": 3.11, "eval_loss": 1.1365880966186523, "eval_runtime": 220.7213, "eval_samples_per_second": 453.06, "eval_steps_per_second": 14.158, "step": 331500 }, { "epoch": 3.12, "learning_rate": 1.8844428595559394e-05, "loss": 1.3123, "step": 332000 }, { "epoch": 3.12, "eval_loss": 1.1372052431106567, "eval_runtime": 220.7014, "eval_samples_per_second": 453.101, "eval_steps_per_second": 14.159, "step": 332000 }, { "epoch": 3.12, "learning_rate": 1.8797507554287647e-05, "loss": 1.29, "step": 332500 }, { "epoch": 3.12, "eval_loss": 1.1368725299835205, "eval_runtime": 220.7248, "eval_samples_per_second": 453.053, "eval_steps_per_second": 14.158, "step": 332500 }, { "epoch": 3.12, "learning_rate": 1.8750586513015897e-05, "loss": 1.2973, "step": 333000 }, { "epoch": 3.12, "eval_loss": 1.1378732919692993, "eval_runtime": 220.7445, "eval_samples_per_second": 453.012, "eval_steps_per_second": 14.157, "step": 333000 }, { "epoch": 3.13, "learning_rate": 1.870366547174415e-05, "loss": 1.294, "step": 333500 }, { "epoch": 3.13, "eval_loss": 1.134841799736023, "eval_runtime": 220.7851, "eval_samples_per_second": 452.929, "eval_steps_per_second": 14.154, "step": 333500 }, { "epoch": 3.13, "learning_rate": 1.8656744430472404e-05, "loss": 1.3215, "step": 334000 }, { "epoch": 3.13, "eval_loss": 1.135761022567749, "eval_runtime": 220.7626, "eval_samples_per_second": 452.975, "eval_steps_per_second": 14.155, "step": 334000 }, { "epoch": 3.14, "learning_rate": 1.8609823389200654e-05, "loss": 1.3083, "step": 334500 }, { "epoch": 3.14, "eval_loss": 1.135439157485962, "eval_runtime": 220.7917, "eval_samples_per_second": 452.916, "eval_steps_per_second": 14.154, "step": 334500 }, { "epoch": 3.14, "learning_rate": 1.8562902347928907e-05, "loss": 1.2956, "step": 335000 }, { "epoch": 3.14, "eval_loss": 1.1306705474853516, "eval_runtime": 220.7752, "eval_samples_per_second": 452.949, "eval_steps_per_second": 14.155, "step": 335000 }, { "epoch": 3.15, "learning_rate": 1.8515981306657157e-05, "loss": 1.2909, "step": 335500 }, { "epoch": 3.15, "eval_loss": 1.1373645067214966, "eval_runtime": 220.7747, "eval_samples_per_second": 452.95, "eval_steps_per_second": 14.155, "step": 335500 }, { "epoch": 3.15, "learning_rate": 1.846906026538541e-05, "loss": 1.3079, "step": 336000 }, { "epoch": 3.15, "eval_loss": 1.1389541625976562, "eval_runtime": 220.7343, "eval_samples_per_second": 453.033, "eval_steps_per_second": 14.157, "step": 336000 }, { "epoch": 3.16, "learning_rate": 1.8422139224113664e-05, "loss": 1.2948, "step": 336500 }, { "epoch": 3.16, "eval_loss": 1.13786780834198, "eval_runtime": 220.7342, "eval_samples_per_second": 453.034, "eval_steps_per_second": 14.157, "step": 336500 }, { "epoch": 3.16, "learning_rate": 1.8375218182841913e-05, "loss": 1.287, "step": 337000 }, { "epoch": 3.16, "eval_loss": 1.1371397972106934, "eval_runtime": 220.7289, "eval_samples_per_second": 453.044, "eval_steps_per_second": 14.158, "step": 337000 }, { "epoch": 3.17, "learning_rate": 1.8328297141570167e-05, "loss": 1.3254, "step": 337500 }, { "epoch": 3.17, "eval_loss": 1.1344914436340332, "eval_runtime": 220.7461, "eval_samples_per_second": 453.009, "eval_steps_per_second": 14.157, "step": 337500 }, { "epoch": 3.17, "learning_rate": 1.828137610029842e-05, "loss": 1.2886, "step": 338000 }, { "epoch": 3.17, "eval_loss": 1.1374002695083618, "eval_runtime": 220.7432, "eval_samples_per_second": 453.015, "eval_steps_per_second": 14.157, "step": 338000 }, { "epoch": 3.18, "learning_rate": 1.823445505902667e-05, "loss": 1.3021, "step": 338500 }, { "epoch": 3.18, "eval_loss": 1.1403025388717651, "eval_runtime": 220.7142, "eval_samples_per_second": 453.075, "eval_steps_per_second": 14.159, "step": 338500 }, { "epoch": 3.18, "learning_rate": 1.8187534017754923e-05, "loss": 1.2922, "step": 339000 }, { "epoch": 3.18, "eval_loss": 1.1281777620315552, "eval_runtime": 220.889, "eval_samples_per_second": 452.716, "eval_steps_per_second": 14.147, "step": 339000 }, { "epoch": 3.19, "learning_rate": 1.8140612976483176e-05, "loss": 1.305, "step": 339500 }, { "epoch": 3.19, "eval_loss": 1.1293456554412842, "eval_runtime": 220.8438, "eval_samples_per_second": 452.809, "eval_steps_per_second": 14.15, "step": 339500 }, { "epoch": 3.19, "learning_rate": 1.8093691935211426e-05, "loss": 1.3056, "step": 340000 }, { "epoch": 3.19, "eval_loss": 1.1341973543167114, "eval_runtime": 220.8184, "eval_samples_per_second": 452.861, "eval_steps_per_second": 14.152, "step": 340000 }, { "epoch": 3.2, "learning_rate": 1.804677089393968e-05, "loss": 1.2914, "step": 340500 }, { "epoch": 3.2, "eval_loss": 1.133408546447754, "eval_runtime": 220.7019, "eval_samples_per_second": 453.1, "eval_steps_per_second": 14.159, "step": 340500 }, { "epoch": 3.2, "learning_rate": 1.799984985266793e-05, "loss": 1.3, "step": 341000 }, { "epoch": 3.2, "eval_loss": 1.1342601776123047, "eval_runtime": 220.5902, "eval_samples_per_second": 453.329, "eval_steps_per_second": 14.167, "step": 341000 }, { "epoch": 3.2, "learning_rate": 1.7952928811396183e-05, "loss": 1.2899, "step": 341500 }, { "epoch": 3.2, "eval_loss": 1.1248433589935303, "eval_runtime": 220.5614, "eval_samples_per_second": 453.388, "eval_steps_per_second": 14.168, "step": 341500 }, { "epoch": 3.21, "learning_rate": 1.7906007770124436e-05, "loss": 1.315, "step": 342000 }, { "epoch": 3.21, "eval_loss": 1.1279655694961548, "eval_runtime": 220.57, "eval_samples_per_second": 453.371, "eval_steps_per_second": 14.168, "step": 342000 }, { "epoch": 3.21, "learning_rate": 1.7859086728852686e-05, "loss": 1.3153, "step": 342500 }, { "epoch": 3.21, "eval_loss": 1.125070571899414, "eval_runtime": 220.6119, "eval_samples_per_second": 453.285, "eval_steps_per_second": 14.165, "step": 342500 }, { "epoch": 3.22, "learning_rate": 1.781216568758094e-05, "loss": 1.3028, "step": 343000 }, { "epoch": 3.22, "eval_loss": 1.1249561309814453, "eval_runtime": 220.5892, "eval_samples_per_second": 453.331, "eval_steps_per_second": 14.167, "step": 343000 }, { "epoch": 3.22, "learning_rate": 1.7765244646309193e-05, "loss": 1.2917, "step": 343500 }, { "epoch": 3.22, "eval_loss": 1.1268163919448853, "eval_runtime": 220.6095, "eval_samples_per_second": 453.29, "eval_steps_per_second": 14.165, "step": 343500 }, { "epoch": 3.23, "learning_rate": 1.7718323605037442e-05, "loss": 1.3129, "step": 344000 }, { "epoch": 3.23, "eval_loss": 1.130949854850769, "eval_runtime": 220.5805, "eval_samples_per_second": 453.349, "eval_steps_per_second": 14.167, "step": 344000 }, { "epoch": 3.23, "learning_rate": 1.7671402563765696e-05, "loss": 1.3006, "step": 344500 }, { "epoch": 3.23, "eval_loss": 1.130674958229065, "eval_runtime": 220.5791, "eval_samples_per_second": 453.352, "eval_steps_per_second": 14.167, "step": 344500 }, { "epoch": 3.24, "learning_rate": 1.762448152249395e-05, "loss": 1.296, "step": 345000 }, { "epoch": 3.24, "eval_loss": 1.1254706382751465, "eval_runtime": 220.6122, "eval_samples_per_second": 453.284, "eval_steps_per_second": 14.165, "step": 345000 }, { "epoch": 3.24, "learning_rate": 1.75775604812222e-05, "loss": 1.2797, "step": 345500 }, { "epoch": 3.24, "eval_loss": 1.1241164207458496, "eval_runtime": 220.5527, "eval_samples_per_second": 453.406, "eval_steps_per_second": 14.169, "step": 345500 }, { "epoch": 3.25, "learning_rate": 1.7530639439950452e-05, "loss": 1.2894, "step": 346000 }, { "epoch": 3.25, "eval_loss": 1.1250615119934082, "eval_runtime": 220.5648, "eval_samples_per_second": 453.381, "eval_steps_per_second": 14.168, "step": 346000 }, { "epoch": 3.25, "learning_rate": 1.7483718398678706e-05, "loss": 1.2935, "step": 346500 }, { "epoch": 3.25, "eval_loss": 1.1238473653793335, "eval_runtime": 220.5533, "eval_samples_per_second": 453.405, "eval_steps_per_second": 14.169, "step": 346500 }, { "epoch": 3.26, "learning_rate": 1.7436797357406955e-05, "loss": 1.2986, "step": 347000 }, { "epoch": 3.26, "eval_loss": 1.1256763935089111, "eval_runtime": 220.5765, "eval_samples_per_second": 453.358, "eval_steps_per_second": 14.167, "step": 347000 }, { "epoch": 3.26, "learning_rate": 1.738987631613521e-05, "loss": 1.3051, "step": 347500 }, { "epoch": 3.26, "eval_loss": 1.1246867179870605, "eval_runtime": 220.5788, "eval_samples_per_second": 453.353, "eval_steps_per_second": 14.167, "step": 347500 }, { "epoch": 3.27, "learning_rate": 1.734295527486346e-05, "loss": 1.2825, "step": 348000 }, { "epoch": 3.27, "eval_loss": 1.1220121383666992, "eval_runtime": 220.5841, "eval_samples_per_second": 453.342, "eval_steps_per_second": 14.167, "step": 348000 }, { "epoch": 3.27, "learning_rate": 1.7296034233591712e-05, "loss": 1.3064, "step": 348500 }, { "epoch": 3.27, "eval_loss": 1.125480055809021, "eval_runtime": 220.6229, "eval_samples_per_second": 453.262, "eval_steps_per_second": 14.164, "step": 348500 }, { "epoch": 3.28, "learning_rate": 1.7249113192319965e-05, "loss": 1.3057, "step": 349000 }, { "epoch": 3.28, "eval_loss": 1.122577428817749, "eval_runtime": 220.7794, "eval_samples_per_second": 452.941, "eval_steps_per_second": 14.154, "step": 349000 }, { "epoch": 3.28, "learning_rate": 1.7202192151048215e-05, "loss": 1.3045, "step": 349500 }, { "epoch": 3.28, "eval_loss": 1.1199777126312256, "eval_runtime": 220.7547, "eval_samples_per_second": 452.991, "eval_steps_per_second": 14.156, "step": 349500 }, { "epoch": 3.28, "learning_rate": 1.715527110977647e-05, "loss": 1.2658, "step": 350000 }, { "epoch": 3.28, "eval_loss": 1.1250447034835815, "eval_runtime": 220.7117, "eval_samples_per_second": 453.08, "eval_steps_per_second": 14.159, "step": 350000 }, { "epoch": 3.29, "learning_rate": 1.710835006850472e-05, "loss": 1.2736, "step": 350500 }, { "epoch": 3.29, "eval_loss": 1.1251693964004517, "eval_runtime": 220.6204, "eval_samples_per_second": 453.267, "eval_steps_per_second": 14.165, "step": 350500 }, { "epoch": 3.29, "learning_rate": 1.706142902723297e-05, "loss": 1.3004, "step": 351000 }, { "epoch": 3.29, "eval_loss": 1.121744155883789, "eval_runtime": 220.47, "eval_samples_per_second": 453.576, "eval_steps_per_second": 14.174, "step": 351000 }, { "epoch": 3.3, "learning_rate": 1.7014507985961225e-05, "loss": 1.3014, "step": 351500 }, { "epoch": 3.3, "eval_loss": 1.1183676719665527, "eval_runtime": 220.4996, "eval_samples_per_second": 453.516, "eval_steps_per_second": 14.172, "step": 351500 }, { "epoch": 3.3, "learning_rate": 1.6967586944689478e-05, "loss": 1.2903, "step": 352000 }, { "epoch": 3.3, "eval_loss": 1.128160834312439, "eval_runtime": 220.4904, "eval_samples_per_second": 453.534, "eval_steps_per_second": 14.173, "step": 352000 }, { "epoch": 3.31, "learning_rate": 1.6920665903417728e-05, "loss": 1.2826, "step": 352500 }, { "epoch": 3.31, "eval_loss": 1.1223293542861938, "eval_runtime": 220.5271, "eval_samples_per_second": 453.459, "eval_steps_per_second": 14.171, "step": 352500 }, { "epoch": 3.31, "learning_rate": 1.687374486214598e-05, "loss": 1.2968, "step": 353000 }, { "epoch": 3.31, "eval_loss": 1.119235873222351, "eval_runtime": 220.499, "eval_samples_per_second": 453.517, "eval_steps_per_second": 14.172, "step": 353000 }, { "epoch": 3.32, "learning_rate": 1.682682382087423e-05, "loss": 1.2715, "step": 353500 }, { "epoch": 3.32, "eval_loss": 1.1275289058685303, "eval_runtime": 220.5053, "eval_samples_per_second": 453.504, "eval_steps_per_second": 14.172, "step": 353500 }, { "epoch": 3.32, "learning_rate": 1.6779902779602484e-05, "loss": 1.2998, "step": 354000 }, { "epoch": 3.32, "eval_loss": 1.1185322999954224, "eval_runtime": 220.5197, "eval_samples_per_second": 453.474, "eval_steps_per_second": 14.171, "step": 354000 }, { "epoch": 3.33, "learning_rate": 1.6732981738330738e-05, "loss": 1.2875, "step": 354500 }, { "epoch": 3.33, "eval_loss": 1.116925597190857, "eval_runtime": 220.5237, "eval_samples_per_second": 453.466, "eval_steps_per_second": 14.171, "step": 354500 }, { "epoch": 3.33, "learning_rate": 1.6686060697058988e-05, "loss": 1.277, "step": 355000 }, { "epoch": 3.33, "eval_loss": 1.116443157196045, "eval_runtime": 220.4994, "eval_samples_per_second": 453.516, "eval_steps_per_second": 14.172, "step": 355000 }, { "epoch": 3.34, "learning_rate": 1.663913965578724e-05, "loss": 1.2764, "step": 355500 }, { "epoch": 3.34, "eval_loss": 1.1129635572433472, "eval_runtime": 220.5171, "eval_samples_per_second": 453.48, "eval_steps_per_second": 14.171, "step": 355500 }, { "epoch": 3.34, "learning_rate": 1.6592218614515494e-05, "loss": 1.2677, "step": 356000 }, { "epoch": 3.34, "eval_loss": 1.1151468753814697, "eval_runtime": 220.4847, "eval_samples_per_second": 453.546, "eval_steps_per_second": 14.173, "step": 356000 }, { "epoch": 3.35, "learning_rate": 1.6545297573243744e-05, "loss": 1.284, "step": 356500 }, { "epoch": 3.35, "eval_loss": 1.1204290390014648, "eval_runtime": 220.4567, "eval_samples_per_second": 453.604, "eval_steps_per_second": 14.175, "step": 356500 }, { "epoch": 3.35, "learning_rate": 1.6498376531971997e-05, "loss": 1.2776, "step": 357000 }, { "epoch": 3.35, "eval_loss": 1.1218979358673096, "eval_runtime": 220.4675, "eval_samples_per_second": 453.582, "eval_steps_per_second": 14.174, "step": 357000 }, { "epoch": 3.35, "learning_rate": 1.645145549070025e-05, "loss": 1.2877, "step": 357500 }, { "epoch": 3.35, "eval_loss": 1.1146986484527588, "eval_runtime": 220.4757, "eval_samples_per_second": 453.565, "eval_steps_per_second": 14.174, "step": 357500 }, { "epoch": 3.36, "learning_rate": 1.64045344494285e-05, "loss": 1.2732, "step": 358000 }, { "epoch": 3.36, "eval_loss": 1.1204952001571655, "eval_runtime": 220.4932, "eval_samples_per_second": 453.529, "eval_steps_per_second": 14.173, "step": 358000 }, { "epoch": 3.36, "learning_rate": 1.6357613408156754e-05, "loss": 1.2887, "step": 358500 }, { "epoch": 3.36, "eval_loss": 1.113034963607788, "eval_runtime": 220.5465, "eval_samples_per_second": 453.419, "eval_steps_per_second": 14.169, "step": 358500 }, { "epoch": 3.37, "learning_rate": 1.6310692366885004e-05, "loss": 1.273, "step": 359000 }, { "epoch": 3.37, "eval_loss": 1.1138362884521484, "eval_runtime": 220.5129, "eval_samples_per_second": 453.488, "eval_steps_per_second": 14.172, "step": 359000 }, { "epoch": 3.37, "learning_rate": 1.6263771325613257e-05, "loss": 1.2645, "step": 359500 }, { "epoch": 3.37, "eval_loss": 1.1159985065460205, "eval_runtime": 220.5661, "eval_samples_per_second": 453.379, "eval_steps_per_second": 14.168, "step": 359500 }, { "epoch": 3.38, "learning_rate": 1.621685028434151e-05, "loss": 1.2909, "step": 360000 }, { "epoch": 3.38, "eval_loss": 1.1123580932617188, "eval_runtime": 220.6926, "eval_samples_per_second": 453.119, "eval_steps_per_second": 14.16, "step": 360000 }, { "epoch": 3.38, "learning_rate": 1.616992924306976e-05, "loss": 1.2871, "step": 360500 }, { "epoch": 3.38, "eval_loss": 1.112741231918335, "eval_runtime": 220.5711, "eval_samples_per_second": 453.369, "eval_steps_per_second": 14.168, "step": 360500 }, { "epoch": 3.39, "learning_rate": 1.6123008201798014e-05, "loss": 1.2848, "step": 361000 }, { "epoch": 3.39, "eval_loss": 1.1092541217803955, "eval_runtime": 220.558, "eval_samples_per_second": 453.396, "eval_steps_per_second": 14.169, "step": 361000 }, { "epoch": 3.39, "learning_rate": 1.6076087160526267e-05, "loss": 1.2805, "step": 361500 }, { "epoch": 3.39, "eval_loss": 1.1068326234817505, "eval_runtime": 220.4445, "eval_samples_per_second": 453.629, "eval_steps_per_second": 14.176, "step": 361500 }, { "epoch": 3.4, "learning_rate": 1.602916611925452e-05, "loss": 1.2747, "step": 362000 }, { "epoch": 3.4, "eval_loss": 1.108471393585205, "eval_runtime": 220.3945, "eval_samples_per_second": 453.732, "eval_steps_per_second": 14.179, "step": 362000 }, { "epoch": 3.4, "learning_rate": 1.598224507798277e-05, "loss": 1.286, "step": 362500 }, { "epoch": 3.4, "eval_loss": 1.116951584815979, "eval_runtime": 220.4488, "eval_samples_per_second": 453.62, "eval_steps_per_second": 14.176, "step": 362500 }, { "epoch": 3.41, "learning_rate": 1.5935324036711023e-05, "loss": 1.2923, "step": 363000 }, { "epoch": 3.41, "eval_loss": 1.106821060180664, "eval_runtime": 220.4737, "eval_samples_per_second": 453.569, "eval_steps_per_second": 14.174, "step": 363000 }, { "epoch": 3.41, "learning_rate": 1.5888402995439277e-05, "loss": 1.2452, "step": 363500 }, { "epoch": 3.41, "eval_loss": 1.1107667684555054, "eval_runtime": 220.4687, "eval_samples_per_second": 453.579, "eval_steps_per_second": 14.174, "step": 363500 }, { "epoch": 3.42, "learning_rate": 1.5841481954167526e-05, "loss": 1.301, "step": 364000 }, { "epoch": 3.42, "eval_loss": 1.103507161140442, "eval_runtime": 220.5105, "eval_samples_per_second": 453.493, "eval_steps_per_second": 14.172, "step": 364000 }, { "epoch": 3.42, "learning_rate": 1.579456091289578e-05, "loss": 1.2933, "step": 364500 }, { "epoch": 3.42, "eval_loss": 1.1143946647644043, "eval_runtime": 220.552, "eval_samples_per_second": 453.408, "eval_steps_per_second": 14.169, "step": 364500 }, { "epoch": 3.43, "learning_rate": 1.5747639871624033e-05, "loss": 1.2676, "step": 365000 }, { "epoch": 3.43, "eval_loss": 1.1107447147369385, "eval_runtime": 220.5769, "eval_samples_per_second": 453.357, "eval_steps_per_second": 14.167, "step": 365000 }, { "epoch": 3.43, "learning_rate": 1.5700718830352283e-05, "loss": 1.2726, "step": 365500 }, { "epoch": 3.43, "eval_loss": 1.109850525856018, "eval_runtime": 220.6362, "eval_samples_per_second": 453.235, "eval_steps_per_second": 14.164, "step": 365500 }, { "epoch": 3.43, "learning_rate": 1.5653797789080536e-05, "loss": 1.2798, "step": 366000 }, { "epoch": 3.43, "eval_loss": 1.1126877069473267, "eval_runtime": 220.6282, "eval_samples_per_second": 453.251, "eval_steps_per_second": 14.164, "step": 366000 }, { "epoch": 3.44, "learning_rate": 1.560687674780879e-05, "loss": 1.297, "step": 366500 }, { "epoch": 3.44, "eval_loss": 1.1066131591796875, "eval_runtime": 220.6565, "eval_samples_per_second": 453.193, "eval_steps_per_second": 14.162, "step": 366500 }, { "epoch": 3.44, "learning_rate": 1.555995570653704e-05, "loss": 1.2886, "step": 367000 }, { "epoch": 3.44, "eval_loss": 1.1080513000488281, "eval_runtime": 220.6411, "eval_samples_per_second": 453.225, "eval_steps_per_second": 14.163, "step": 367000 }, { "epoch": 3.45, "learning_rate": 1.5513034665265293e-05, "loss": 1.294, "step": 367500 }, { "epoch": 3.45, "eval_loss": 1.109612226486206, "eval_runtime": 220.6484, "eval_samples_per_second": 453.21, "eval_steps_per_second": 14.163, "step": 367500 }, { "epoch": 3.45, "learning_rate": 1.5466113623993546e-05, "loss": 1.2738, "step": 368000 }, { "epoch": 3.45, "eval_loss": 1.099606990814209, "eval_runtime": 220.7045, "eval_samples_per_second": 453.095, "eval_steps_per_second": 14.159, "step": 368000 }, { "epoch": 3.46, "learning_rate": 1.54191925827218e-05, "loss": 1.2877, "step": 368500 }, { "epoch": 3.46, "eval_loss": 1.106638789176941, "eval_runtime": 220.7202, "eval_samples_per_second": 453.062, "eval_steps_per_second": 14.158, "step": 368500 }, { "epoch": 3.46, "learning_rate": 1.537227154145005e-05, "loss": 1.276, "step": 369000 }, { "epoch": 3.46, "eval_loss": 1.1104212999343872, "eval_runtime": 220.7131, "eval_samples_per_second": 453.077, "eval_steps_per_second": 14.159, "step": 369000 }, { "epoch": 3.47, "learning_rate": 1.5325350500178302e-05, "loss": 1.2741, "step": 369500 }, { "epoch": 3.47, "eval_loss": 1.1078869104385376, "eval_runtime": 220.7191, "eval_samples_per_second": 453.065, "eval_steps_per_second": 14.158, "step": 369500 }, { "epoch": 3.47, "learning_rate": 1.5278429458906556e-05, "loss": 1.2641, "step": 370000 }, { "epoch": 3.47, "eval_loss": 1.1033143997192383, "eval_runtime": 220.9613, "eval_samples_per_second": 452.568, "eval_steps_per_second": 14.143, "step": 370000 }, { "epoch": 3.48, "learning_rate": 1.5231508417634804e-05, "loss": 1.2774, "step": 370500 }, { "epoch": 3.48, "eval_loss": 1.1071778535842896, "eval_runtime": 220.9875, "eval_samples_per_second": 452.514, "eval_steps_per_second": 14.141, "step": 370500 }, { "epoch": 3.48, "learning_rate": 1.5184587376363057e-05, "loss": 1.2555, "step": 371000 }, { "epoch": 3.48, "eval_loss": 1.1127955913543701, "eval_runtime": 220.9818, "eval_samples_per_second": 452.526, "eval_steps_per_second": 14.141, "step": 371000 }, { "epoch": 3.49, "learning_rate": 1.5137666335091307e-05, "loss": 1.2704, "step": 371500 }, { "epoch": 3.49, "eval_loss": 1.1075456142425537, "eval_runtime": 220.945, "eval_samples_per_second": 452.601, "eval_steps_per_second": 14.144, "step": 371500 }, { "epoch": 3.49, "learning_rate": 1.509074529381956e-05, "loss": 1.273, "step": 372000 }, { "epoch": 3.49, "eval_loss": 1.0989867448806763, "eval_runtime": 220.9281, "eval_samples_per_second": 452.636, "eval_steps_per_second": 14.145, "step": 372000 }, { "epoch": 3.5, "learning_rate": 1.5043824252547814e-05, "loss": 1.2788, "step": 372500 }, { "epoch": 3.5, "eval_loss": 1.107438087463379, "eval_runtime": 220.898, "eval_samples_per_second": 452.698, "eval_steps_per_second": 14.147, "step": 372500 }, { "epoch": 3.5, "learning_rate": 1.4996903211276064e-05, "loss": 1.2729, "step": 373000 }, { "epoch": 3.5, "eval_loss": 1.104393720626831, "eval_runtime": 220.8922, "eval_samples_per_second": 452.71, "eval_steps_per_second": 14.147, "step": 373000 }, { "epoch": 3.51, "learning_rate": 1.4949982170004317e-05, "loss": 1.261, "step": 373500 }, { "epoch": 3.51, "eval_loss": 1.1048295497894287, "eval_runtime": 220.9264, "eval_samples_per_second": 452.639, "eval_steps_per_second": 14.145, "step": 373500 }, { "epoch": 3.51, "learning_rate": 1.490306112873257e-05, "loss": 1.2739, "step": 374000 }, { "epoch": 3.51, "eval_loss": 1.1010278463363647, "eval_runtime": 220.921, "eval_samples_per_second": 452.651, "eval_steps_per_second": 14.145, "step": 374000 }, { "epoch": 3.51, "learning_rate": 1.4856140087460822e-05, "loss": 1.288, "step": 374500 }, { "epoch": 3.51, "eval_loss": 1.1028172969818115, "eval_runtime": 220.9673, "eval_samples_per_second": 452.556, "eval_steps_per_second": 14.142, "step": 374500 }, { "epoch": 3.52, "learning_rate": 1.4809219046189073e-05, "loss": 1.2669, "step": 375000 }, { "epoch": 3.52, "eval_loss": 1.1051478385925293, "eval_runtime": 221.0246, "eval_samples_per_second": 452.438, "eval_steps_per_second": 14.139, "step": 375000 }, { "epoch": 3.52, "learning_rate": 1.4762298004917327e-05, "loss": 1.2715, "step": 375500 }, { "epoch": 3.52, "eval_loss": 1.1000804901123047, "eval_runtime": 221.0676, "eval_samples_per_second": 452.35, "eval_steps_per_second": 14.136, "step": 375500 }, { "epoch": 3.53, "learning_rate": 1.4715376963645578e-05, "loss": 1.2728, "step": 376000 }, { "epoch": 3.53, "eval_loss": 1.1016603708267212, "eval_runtime": 221.0626, "eval_samples_per_second": 452.361, "eval_steps_per_second": 14.136, "step": 376000 }, { "epoch": 3.53, "learning_rate": 1.466845592237383e-05, "loss": 1.2625, "step": 376500 }, { "epoch": 3.53, "eval_loss": 1.0998036861419678, "eval_runtime": 221.0903, "eval_samples_per_second": 452.304, "eval_steps_per_second": 14.134, "step": 376500 }, { "epoch": 3.54, "learning_rate": 1.4621534881102081e-05, "loss": 1.2652, "step": 377000 }, { "epoch": 3.54, "eval_loss": 1.1029491424560547, "eval_runtime": 221.0807, "eval_samples_per_second": 452.324, "eval_steps_per_second": 14.135, "step": 377000 }, { "epoch": 3.54, "learning_rate": 1.4574613839830335e-05, "loss": 1.2623, "step": 377500 }, { "epoch": 3.54, "eval_loss": 1.103481411933899, "eval_runtime": 221.1214, "eval_samples_per_second": 452.24, "eval_steps_per_second": 14.133, "step": 377500 }, { "epoch": 3.55, "learning_rate": 1.4527692798558586e-05, "loss": 1.2741, "step": 378000 }, { "epoch": 3.55, "eval_loss": 1.099875569343567, "eval_runtime": 221.1227, "eval_samples_per_second": 452.238, "eval_steps_per_second": 14.132, "step": 378000 }, { "epoch": 3.55, "learning_rate": 1.4480771757286838e-05, "loss": 1.2711, "step": 378500 }, { "epoch": 3.55, "eval_loss": 1.1044048070907593, "eval_runtime": 221.595, "eval_samples_per_second": 451.274, "eval_steps_per_second": 14.102, "step": 378500 }, { "epoch": 3.56, "learning_rate": 1.4433850716015091e-05, "loss": 1.2755, "step": 379000 }, { "epoch": 3.56, "eval_loss": 1.099456548690796, "eval_runtime": 221.7605, "eval_samples_per_second": 450.937, "eval_steps_per_second": 14.092, "step": 379000 }, { "epoch": 3.56, "learning_rate": 1.4386929674743343e-05, "loss": 1.277, "step": 379500 }, { "epoch": 3.56, "eval_loss": 1.0986175537109375, "eval_runtime": 221.6625, "eval_samples_per_second": 451.136, "eval_steps_per_second": 14.098, "step": 379500 }, { "epoch": 3.57, "learning_rate": 1.4340008633471594e-05, "loss": 1.2927, "step": 380000 }, { "epoch": 3.57, "eval_loss": 1.0979562997817993, "eval_runtime": 221.6264, "eval_samples_per_second": 451.21, "eval_steps_per_second": 14.1, "step": 380000 }, { "epoch": 3.57, "learning_rate": 1.4293087592199848e-05, "loss": 1.2719, "step": 380500 }, { "epoch": 3.57, "eval_loss": 1.0883769989013672, "eval_runtime": 221.2672, "eval_samples_per_second": 451.942, "eval_steps_per_second": 14.123, "step": 380500 }, { "epoch": 3.58, "learning_rate": 1.4246166550928101e-05, "loss": 1.2681, "step": 381000 }, { "epoch": 3.58, "eval_loss": 1.0984450578689575, "eval_runtime": 221.3116, "eval_samples_per_second": 451.852, "eval_steps_per_second": 14.12, "step": 381000 }, { "epoch": 3.58, "learning_rate": 1.419924550965635e-05, "loss": 1.2686, "step": 381500 }, { "epoch": 3.58, "eval_loss": 1.0925695896148682, "eval_runtime": 221.3252, "eval_samples_per_second": 451.824, "eval_steps_per_second": 14.119, "step": 381500 }, { "epoch": 3.58, "learning_rate": 1.4152324468384604e-05, "loss": 1.2669, "step": 382000 }, { "epoch": 3.58, "eval_loss": 1.1006863117218018, "eval_runtime": 221.3717, "eval_samples_per_second": 451.729, "eval_steps_per_second": 14.117, "step": 382000 }, { "epoch": 3.59, "learning_rate": 1.4105403427112854e-05, "loss": 1.2682, "step": 382500 }, { "epoch": 3.59, "eval_loss": 1.0927406549453735, "eval_runtime": 221.3896, "eval_samples_per_second": 451.692, "eval_steps_per_second": 14.115, "step": 382500 }, { "epoch": 3.59, "learning_rate": 1.4058482385841107e-05, "loss": 1.2713, "step": 383000 }, { "epoch": 3.59, "eval_loss": 1.0991029739379883, "eval_runtime": 221.3668, "eval_samples_per_second": 451.739, "eval_steps_per_second": 14.117, "step": 383000 }, { "epoch": 3.6, "learning_rate": 1.401156134456936e-05, "loss": 1.2405, "step": 383500 }, { "epoch": 3.6, "eval_loss": 1.1023343801498413, "eval_runtime": 221.3768, "eval_samples_per_second": 451.718, "eval_steps_per_second": 14.116, "step": 383500 }, { "epoch": 3.6, "learning_rate": 1.396464030329761e-05, "loss": 1.2564, "step": 384000 }, { "epoch": 3.6, "eval_loss": 1.0970436334609985, "eval_runtime": 221.4221, "eval_samples_per_second": 451.626, "eval_steps_per_second": 14.113, "step": 384000 }, { "epoch": 3.61, "learning_rate": 1.3917719262025864e-05, "loss": 1.2774, "step": 384500 }, { "epoch": 3.61, "eval_loss": 1.0976711511611938, "eval_runtime": 221.4388, "eval_samples_per_second": 451.592, "eval_steps_per_second": 14.112, "step": 384500 }, { "epoch": 3.61, "learning_rate": 1.3870798220754117e-05, "loss": 1.2732, "step": 385000 }, { "epoch": 3.61, "eval_loss": 1.091884732246399, "eval_runtime": 221.434, "eval_samples_per_second": 451.602, "eval_steps_per_second": 14.113, "step": 385000 }, { "epoch": 3.62, "learning_rate": 1.3823877179482367e-05, "loss": 1.2868, "step": 385500 }, { "epoch": 3.62, "eval_loss": 1.0924078226089478, "eval_runtime": 221.4826, "eval_samples_per_second": 451.503, "eval_steps_per_second": 14.109, "step": 385500 }, { "epoch": 3.62, "learning_rate": 1.377695613821062e-05, "loss": 1.2648, "step": 386000 }, { "epoch": 3.62, "eval_loss": 1.0871402025222778, "eval_runtime": 221.6939, "eval_samples_per_second": 451.072, "eval_steps_per_second": 14.096, "step": 386000 }, { "epoch": 3.63, "learning_rate": 1.3730035096938873e-05, "loss": 1.238, "step": 386500 }, { "epoch": 3.63, "eval_loss": 1.0820244550704956, "eval_runtime": 221.7091, "eval_samples_per_second": 451.042, "eval_steps_per_second": 14.095, "step": 386500 }, { "epoch": 3.63, "learning_rate": 1.3683114055667123e-05, "loss": 1.261, "step": 387000 }, { "epoch": 3.63, "eval_loss": 1.0901474952697754, "eval_runtime": 221.6939, "eval_samples_per_second": 451.072, "eval_steps_per_second": 14.096, "step": 387000 }, { "epoch": 3.64, "learning_rate": 1.3636193014395377e-05, "loss": 1.2589, "step": 387500 }, { "epoch": 3.64, "eval_loss": 1.0906635522842407, "eval_runtime": 221.6797, "eval_samples_per_second": 451.101, "eval_steps_per_second": 14.097, "step": 387500 }, { "epoch": 3.64, "learning_rate": 1.358927197312363e-05, "loss": 1.271, "step": 388000 }, { "epoch": 3.64, "eval_loss": 1.0888662338256836, "eval_runtime": 221.5473, "eval_samples_per_second": 451.371, "eval_steps_per_second": 14.105, "step": 388000 }, { "epoch": 3.65, "learning_rate": 1.354235093185188e-05, "loss": 1.261, "step": 388500 }, { "epoch": 3.65, "eval_loss": 1.0955654382705688, "eval_runtime": 221.4943, "eval_samples_per_second": 451.479, "eval_steps_per_second": 14.109, "step": 388500 }, { "epoch": 3.65, "learning_rate": 1.3495429890580133e-05, "loss": 1.2545, "step": 389000 }, { "epoch": 3.65, "eval_loss": 1.096941590309143, "eval_runtime": 221.5197, "eval_samples_per_second": 451.427, "eval_steps_per_second": 14.107, "step": 389000 }, { "epoch": 3.66, "learning_rate": 1.3448508849308383e-05, "loss": 1.2533, "step": 389500 }, { "epoch": 3.66, "eval_loss": 1.0853080749511719, "eval_runtime": 221.5678, "eval_samples_per_second": 451.329, "eval_steps_per_second": 14.104, "step": 389500 }, { "epoch": 3.66, "learning_rate": 1.3401587808036636e-05, "loss": 1.2683, "step": 390000 }, { "epoch": 3.66, "eval_loss": 1.0878088474273682, "eval_runtime": 221.5397, "eval_samples_per_second": 451.386, "eval_steps_per_second": 14.106, "step": 390000 }, { "epoch": 3.66, "learning_rate": 1.335466676676489e-05, "loss": 1.2485, "step": 390500 }, { "epoch": 3.66, "eval_loss": 1.0916895866394043, "eval_runtime": 221.4993, "eval_samples_per_second": 451.469, "eval_steps_per_second": 14.108, "step": 390500 }, { "epoch": 3.67, "learning_rate": 1.330774572549314e-05, "loss": 1.2733, "step": 391000 }, { "epoch": 3.67, "eval_loss": 1.085897445678711, "eval_runtime": 221.5237, "eval_samples_per_second": 451.419, "eval_steps_per_second": 14.107, "step": 391000 }, { "epoch": 3.67, "learning_rate": 1.3260824684221393e-05, "loss": 1.269, "step": 391500 }, { "epoch": 3.67, "eval_loss": 1.0844569206237793, "eval_runtime": 221.5194, "eval_samples_per_second": 451.428, "eval_steps_per_second": 14.107, "step": 391500 }, { "epoch": 3.68, "learning_rate": 1.3213903642949646e-05, "loss": 1.2515, "step": 392000 }, { "epoch": 3.68, "eval_loss": 1.0949499607086182, "eval_runtime": 221.5302, "eval_samples_per_second": 451.406, "eval_steps_per_second": 14.106, "step": 392000 }, { "epoch": 3.68, "learning_rate": 1.3166982601677896e-05, "loss": 1.2674, "step": 392500 }, { "epoch": 3.68, "eval_loss": 1.0817222595214844, "eval_runtime": 221.5421, "eval_samples_per_second": 451.382, "eval_steps_per_second": 14.106, "step": 392500 }, { "epoch": 3.69, "learning_rate": 1.312006156040615e-05, "loss": 1.2747, "step": 393000 }, { "epoch": 3.69, "eval_loss": 1.0874230861663818, "eval_runtime": 221.4987, "eval_samples_per_second": 451.47, "eval_steps_per_second": 14.108, "step": 393000 }, { "epoch": 3.69, "learning_rate": 1.3073140519134402e-05, "loss": 1.2679, "step": 393500 }, { "epoch": 3.69, "eval_loss": 1.0829836130142212, "eval_runtime": 221.764, "eval_samples_per_second": 450.93, "eval_steps_per_second": 14.092, "step": 393500 }, { "epoch": 3.7, "learning_rate": 1.3026219477862652e-05, "loss": 1.2376, "step": 394000 }, { "epoch": 3.7, "eval_loss": 1.0813868045806885, "eval_runtime": 221.788, "eval_samples_per_second": 450.881, "eval_steps_per_second": 14.09, "step": 394000 }, { "epoch": 3.7, "learning_rate": 1.2979298436590906e-05, "loss": 1.2527, "step": 394500 }, { "epoch": 3.7, "eval_loss": 1.081449270248413, "eval_runtime": 221.7504, "eval_samples_per_second": 450.957, "eval_steps_per_second": 14.092, "step": 394500 }, { "epoch": 3.71, "learning_rate": 1.2932377395319156e-05, "loss": 1.2559, "step": 395000 }, { "epoch": 3.71, "eval_loss": 1.0823462009429932, "eval_runtime": 221.6871, "eval_samples_per_second": 451.086, "eval_steps_per_second": 14.096, "step": 395000 }, { "epoch": 3.71, "learning_rate": 1.2885456354047409e-05, "loss": 1.2265, "step": 395500 }, { "epoch": 3.71, "eval_loss": 1.085395097732544, "eval_runtime": 221.4734, "eval_samples_per_second": 451.522, "eval_steps_per_second": 14.11, "step": 395500 }, { "epoch": 3.72, "learning_rate": 1.2838535312775662e-05, "loss": 1.2799, "step": 396000 }, { "epoch": 3.72, "eval_loss": 1.0858019590377808, "eval_runtime": 221.5008, "eval_samples_per_second": 451.466, "eval_steps_per_second": 14.108, "step": 396000 }, { "epoch": 3.72, "learning_rate": 1.2791614271503912e-05, "loss": 1.2449, "step": 396500 }, { "epoch": 3.72, "eval_loss": 1.0877788066864014, "eval_runtime": 221.5121, "eval_samples_per_second": 451.443, "eval_steps_per_second": 14.108, "step": 396500 }, { "epoch": 3.73, "learning_rate": 1.2744693230232165e-05, "loss": 1.2451, "step": 397000 }, { "epoch": 3.73, "eval_loss": 1.0856256484985352, "eval_runtime": 221.5418, "eval_samples_per_second": 451.382, "eval_steps_per_second": 14.106, "step": 397000 }, { "epoch": 3.73, "learning_rate": 1.2697772188960419e-05, "loss": 1.2292, "step": 397500 }, { "epoch": 3.73, "eval_loss": 1.080007791519165, "eval_runtime": 221.5442, "eval_samples_per_second": 451.377, "eval_steps_per_second": 14.106, "step": 397500 }, { "epoch": 3.73, "learning_rate": 1.2650851147688668e-05, "loss": 1.2452, "step": 398000 }, { "epoch": 3.73, "eval_loss": 1.0801745653152466, "eval_runtime": 221.5032, "eval_samples_per_second": 451.461, "eval_steps_per_second": 14.108, "step": 398000 }, { "epoch": 3.74, "learning_rate": 1.2603930106416922e-05, "loss": 1.2385, "step": 398500 }, { "epoch": 3.74, "eval_loss": 1.079880714416504, "eval_runtime": 221.5406, "eval_samples_per_second": 451.385, "eval_steps_per_second": 14.106, "step": 398500 }, { "epoch": 3.74, "learning_rate": 1.2557009065145175e-05, "loss": 1.2576, "step": 399000 }, { "epoch": 3.74, "eval_loss": 1.0792741775512695, "eval_runtime": 221.5278, "eval_samples_per_second": 451.411, "eval_steps_per_second": 14.107, "step": 399000 }, { "epoch": 3.75, "learning_rate": 1.2510088023873427e-05, "loss": 1.2567, "step": 399500 }, { "epoch": 3.75, "eval_loss": 1.0734611749649048, "eval_runtime": 221.555, "eval_samples_per_second": 451.355, "eval_steps_per_second": 14.105, "step": 399500 }, { "epoch": 3.75, "learning_rate": 1.2463166982601678e-05, "loss": 1.2384, "step": 400000 }, { "epoch": 3.75, "eval_loss": 1.0803865194320679, "eval_runtime": 221.5706, "eval_samples_per_second": 451.324, "eval_steps_per_second": 14.104, "step": 400000 }, { "epoch": 3.76, "learning_rate": 1.241624594132993e-05, "loss": 1.2782, "step": 400500 }, { "epoch": 3.76, "eval_loss": 1.085302472114563, "eval_runtime": 221.6737, "eval_samples_per_second": 451.113, "eval_steps_per_second": 14.097, "step": 400500 }, { "epoch": 3.76, "learning_rate": 1.2369324900058183e-05, "loss": 1.2418, "step": 401000 }, { "epoch": 3.76, "eval_loss": 1.0813859701156616, "eval_runtime": 221.7597, "eval_samples_per_second": 450.938, "eval_steps_per_second": 14.092, "step": 401000 }, { "epoch": 3.77, "learning_rate": 1.2322403858786435e-05, "loss": 1.2467, "step": 401500 }, { "epoch": 3.77, "eval_loss": 1.0765790939331055, "eval_runtime": 221.6965, "eval_samples_per_second": 451.067, "eval_steps_per_second": 14.096, "step": 401500 }, { "epoch": 3.77, "learning_rate": 1.2275482817514688e-05, "loss": 1.2557, "step": 402000 }, { "epoch": 3.77, "eval_loss": 1.074906587600708, "eval_runtime": 221.6682, "eval_samples_per_second": 451.125, "eval_steps_per_second": 14.098, "step": 402000 }, { "epoch": 3.78, "learning_rate": 1.222856177624294e-05, "loss": 1.2417, "step": 402500 }, { "epoch": 3.78, "eval_loss": 1.0793219804763794, "eval_runtime": 221.5416, "eval_samples_per_second": 451.383, "eval_steps_per_second": 14.106, "step": 402500 }, { "epoch": 3.78, "learning_rate": 1.2181640734971191e-05, "loss": 1.2544, "step": 403000 }, { "epoch": 3.78, "eval_loss": 1.0767314434051514, "eval_runtime": 221.4973, "eval_samples_per_second": 451.473, "eval_steps_per_second": 14.109, "step": 403000 }, { "epoch": 3.79, "learning_rate": 1.2134719693699444e-05, "loss": 1.2453, "step": 403500 }, { "epoch": 3.79, "eval_loss": 1.0776728391647339, "eval_runtime": 221.5112, "eval_samples_per_second": 451.444, "eval_steps_per_second": 14.108, "step": 403500 }, { "epoch": 3.79, "learning_rate": 1.2087798652427696e-05, "loss": 1.2508, "step": 404000 }, { "epoch": 3.79, "eval_loss": 1.0749331712722778, "eval_runtime": 221.5304, "eval_samples_per_second": 451.405, "eval_steps_per_second": 14.106, "step": 404000 }, { "epoch": 3.8, "learning_rate": 1.2040877611155948e-05, "loss": 1.2388, "step": 404500 }, { "epoch": 3.8, "eval_loss": 1.079124093055725, "eval_runtime": 221.5211, "eval_samples_per_second": 451.424, "eval_steps_per_second": 14.107, "step": 404500 }, { "epoch": 3.8, "learning_rate": 1.19939565698842e-05, "loss": 1.2614, "step": 405000 }, { "epoch": 3.8, "eval_loss": 1.0767608880996704, "eval_runtime": 221.5647, "eval_samples_per_second": 451.335, "eval_steps_per_second": 14.104, "step": 405000 }, { "epoch": 3.81, "learning_rate": 1.1947035528612453e-05, "loss": 1.2355, "step": 405500 }, { "epoch": 3.81, "eval_loss": 1.0779807567596436, "eval_runtime": 221.5326, "eval_samples_per_second": 451.401, "eval_steps_per_second": 14.106, "step": 405500 }, { "epoch": 3.81, "learning_rate": 1.1900114487340704e-05, "loss": 1.2481, "step": 406000 }, { "epoch": 3.81, "eval_loss": 1.076469898223877, "eval_runtime": 221.5584, "eval_samples_per_second": 451.348, "eval_steps_per_second": 14.105, "step": 406000 }, { "epoch": 3.81, "learning_rate": 1.1853193446068956e-05, "loss": 1.2382, "step": 406500 }, { "epoch": 3.81, "eval_loss": 1.0811249017715454, "eval_runtime": 221.5716, "eval_samples_per_second": 451.321, "eval_steps_per_second": 14.104, "step": 406500 }, { "epoch": 3.82, "learning_rate": 1.1806272404797209e-05, "loss": 1.2331, "step": 407000 }, { "epoch": 3.82, "eval_loss": 1.0766884088516235, "eval_runtime": 221.5762, "eval_samples_per_second": 451.312, "eval_steps_per_second": 14.104, "step": 407000 }, { "epoch": 3.82, "learning_rate": 1.175935136352546e-05, "loss": 1.235, "step": 407500 }, { "epoch": 3.82, "eval_loss": 1.0687439441680908, "eval_runtime": 221.5531, "eval_samples_per_second": 451.359, "eval_steps_per_second": 14.105, "step": 407500 }, { "epoch": 3.83, "learning_rate": 1.1712430322253712e-05, "loss": 1.2506, "step": 408000 }, { "epoch": 3.83, "eval_loss": 1.0767359733581543, "eval_runtime": 221.7729, "eval_samples_per_second": 450.912, "eval_steps_per_second": 14.091, "step": 408000 }, { "epoch": 3.83, "learning_rate": 1.1665509280981964e-05, "loss": 1.2455, "step": 408500 }, { "epoch": 3.83, "eval_loss": 1.0734050273895264, "eval_runtime": 221.7206, "eval_samples_per_second": 451.018, "eval_steps_per_second": 14.094, "step": 408500 }, { "epoch": 3.84, "learning_rate": 1.1618588239710217e-05, "loss": 1.235, "step": 409000 }, { "epoch": 3.84, "eval_loss": 1.076144814491272, "eval_runtime": 221.6417, "eval_samples_per_second": 451.179, "eval_steps_per_second": 14.099, "step": 409000 }, { "epoch": 3.84, "learning_rate": 1.1571667198438469e-05, "loss": 1.227, "step": 409500 }, { "epoch": 3.84, "eval_loss": 1.072109341621399, "eval_runtime": 221.5766, "eval_samples_per_second": 451.311, "eval_steps_per_second": 14.103, "step": 409500 }, { "epoch": 3.85, "learning_rate": 1.152474615716672e-05, "loss": 1.2566, "step": 410000 }, { "epoch": 3.85, "eval_loss": 1.0735670328140259, "eval_runtime": 221.4239, "eval_samples_per_second": 451.623, "eval_steps_per_second": 14.113, "step": 410000 }, { "epoch": 3.85, "learning_rate": 1.1477825115894974e-05, "loss": 1.2523, "step": 410500 }, { "epoch": 3.85, "eval_loss": 1.0737024545669556, "eval_runtime": 221.4176, "eval_samples_per_second": 451.635, "eval_steps_per_second": 14.114, "step": 410500 }, { "epoch": 3.86, "learning_rate": 1.1430904074623225e-05, "loss": 1.239, "step": 411000 }, { "epoch": 3.86, "eval_loss": 1.075139045715332, "eval_runtime": 221.4292, "eval_samples_per_second": 451.612, "eval_steps_per_second": 14.113, "step": 411000 }, { "epoch": 3.86, "learning_rate": 1.1383983033351477e-05, "loss": 1.2632, "step": 411500 }, { "epoch": 3.86, "eval_loss": 1.0686564445495605, "eval_runtime": 221.4503, "eval_samples_per_second": 451.569, "eval_steps_per_second": 14.112, "step": 411500 }, { "epoch": 3.87, "learning_rate": 1.1337061992079728e-05, "loss": 1.2299, "step": 412000 }, { "epoch": 3.87, "eval_loss": 1.0703197717666626, "eval_runtime": 221.4314, "eval_samples_per_second": 451.607, "eval_steps_per_second": 14.113, "step": 412000 }, { "epoch": 3.87, "learning_rate": 1.1290140950807982e-05, "loss": 1.2451, "step": 412500 }, { "epoch": 3.87, "eval_loss": 1.0710246562957764, "eval_runtime": 221.4462, "eval_samples_per_second": 451.577, "eval_steps_per_second": 14.112, "step": 412500 }, { "epoch": 3.88, "learning_rate": 1.1243219909536233e-05, "loss": 1.2372, "step": 413000 }, { "epoch": 3.88, "eval_loss": 1.0705777406692505, "eval_runtime": 221.433, "eval_samples_per_second": 451.604, "eval_steps_per_second": 14.113, "step": 413000 }, { "epoch": 3.88, "learning_rate": 1.1196298868264485e-05, "loss": 1.2282, "step": 413500 }, { "epoch": 3.88, "eval_loss": 1.0712275505065918, "eval_runtime": 221.4443, "eval_samples_per_second": 451.581, "eval_steps_per_second": 14.112, "step": 413500 }, { "epoch": 3.89, "learning_rate": 1.1149377826992736e-05, "loss": 1.2334, "step": 414000 }, { "epoch": 3.89, "eval_loss": 1.072120189666748, "eval_runtime": 221.422, "eval_samples_per_second": 451.626, "eval_steps_per_second": 14.113, "step": 414000 }, { "epoch": 3.89, "learning_rate": 1.110245678572099e-05, "loss": 1.23, "step": 414500 }, { "epoch": 3.89, "eval_loss": 1.0694152116775513, "eval_runtime": 221.4396, "eval_samples_per_second": 451.59, "eval_steps_per_second": 14.112, "step": 414500 }, { "epoch": 3.89, "learning_rate": 1.1055535744449241e-05, "loss": 1.2466, "step": 415000 }, { "epoch": 3.89, "eval_loss": 1.0689135789871216, "eval_runtime": 221.4928, "eval_samples_per_second": 451.482, "eval_steps_per_second": 14.109, "step": 415000 }, { "epoch": 3.9, "learning_rate": 1.1008614703177493e-05, "loss": 1.2166, "step": 415500 }, { "epoch": 3.9, "eval_loss": 1.0654420852661133, "eval_runtime": 221.667, "eval_samples_per_second": 451.127, "eval_steps_per_second": 14.098, "step": 415500 }, { "epoch": 3.9, "learning_rate": 1.0961693661905746e-05, "loss": 1.242, "step": 416000 }, { "epoch": 3.9, "eval_loss": 1.060294270515442, "eval_runtime": 221.5802, "eval_samples_per_second": 451.304, "eval_steps_per_second": 14.103, "step": 416000 }, { "epoch": 3.91, "learning_rate": 1.0914772620633998e-05, "loss": 1.2328, "step": 416500 }, { "epoch": 3.91, "eval_loss": 1.0750603675842285, "eval_runtime": 221.529, "eval_samples_per_second": 451.408, "eval_steps_per_second": 14.107, "step": 416500 }, { "epoch": 3.91, "learning_rate": 1.086785157936225e-05, "loss": 1.2305, "step": 417000 }, { "epoch": 3.91, "eval_loss": 1.0709779262542725, "eval_runtime": 221.4321, "eval_samples_per_second": 451.606, "eval_steps_per_second": 14.113, "step": 417000 }, { "epoch": 3.92, "learning_rate": 1.0820930538090501e-05, "loss": 1.2583, "step": 417500 }, { "epoch": 3.92, "eval_loss": 1.069564700126648, "eval_runtime": 221.3517, "eval_samples_per_second": 451.77, "eval_steps_per_second": 14.118, "step": 417500 }, { "epoch": 3.92, "learning_rate": 1.0774009496818754e-05, "loss": 1.2281, "step": 418000 }, { "epoch": 3.92, "eval_loss": 1.068252444267273, "eval_runtime": 221.3357, "eval_samples_per_second": 451.802, "eval_steps_per_second": 14.119, "step": 418000 }, { "epoch": 3.93, "learning_rate": 1.0727088455547006e-05, "loss": 1.2311, "step": 418500 }, { "epoch": 3.93, "eval_loss": 1.0643572807312012, "eval_runtime": 221.2957, "eval_samples_per_second": 451.884, "eval_steps_per_second": 14.121, "step": 418500 }, { "epoch": 3.93, "learning_rate": 1.0680167414275257e-05, "loss": 1.2295, "step": 419000 }, { "epoch": 3.93, "eval_loss": 1.0733916759490967, "eval_runtime": 221.3151, "eval_samples_per_second": 451.844, "eval_steps_per_second": 14.12, "step": 419000 }, { "epoch": 3.94, "learning_rate": 1.063324637300351e-05, "loss": 1.2443, "step": 419500 }, { "epoch": 3.94, "eval_loss": 1.068743109703064, "eval_runtime": 221.3228, "eval_samples_per_second": 451.829, "eval_steps_per_second": 14.12, "step": 419500 }, { "epoch": 3.94, "learning_rate": 1.0586325331731762e-05, "loss": 1.2457, "step": 420000 }, { "epoch": 3.94, "eval_loss": 1.0654057264328003, "eval_runtime": 221.3124, "eval_samples_per_second": 451.85, "eval_steps_per_second": 14.12, "step": 420000 }, { "epoch": 3.95, "learning_rate": 1.0539404290460014e-05, "loss": 1.2431, "step": 420500 }, { "epoch": 3.95, "eval_loss": 1.0636610984802246, "eval_runtime": 221.147, "eval_samples_per_second": 452.188, "eval_steps_per_second": 14.131, "step": 420500 }, { "epoch": 3.95, "learning_rate": 1.0492483249188265e-05, "loss": 1.2248, "step": 421000 }, { "epoch": 3.95, "eval_loss": 1.0696020126342773, "eval_runtime": 221.1217, "eval_samples_per_second": 452.24, "eval_steps_per_second": 14.132, "step": 421000 }, { "epoch": 3.96, "learning_rate": 1.0445562207916519e-05, "loss": 1.2361, "step": 421500 }, { "epoch": 3.96, "eval_loss": 1.0683060884475708, "eval_runtime": 221.1561, "eval_samples_per_second": 452.169, "eval_steps_per_second": 14.13, "step": 421500 }, { "epoch": 3.96, "learning_rate": 1.039864116664477e-05, "loss": 1.2502, "step": 422000 }, { "epoch": 3.96, "eval_loss": 1.0591408014297485, "eval_runtime": 221.1321, "eval_samples_per_second": 452.218, "eval_steps_per_second": 14.132, "step": 422000 }, { "epoch": 3.96, "learning_rate": 1.0351720125373022e-05, "loss": 1.2188, "step": 422500 }, { "epoch": 3.96, "eval_loss": 1.0668104887008667, "eval_runtime": 221.1228, "eval_samples_per_second": 452.237, "eval_steps_per_second": 14.132, "step": 422500 }, { "epoch": 3.97, "learning_rate": 1.0304799084101273e-05, "loss": 1.2439, "step": 423000 }, { "epoch": 3.97, "eval_loss": 1.06576406955719, "eval_runtime": 221.2975, "eval_samples_per_second": 451.88, "eval_steps_per_second": 14.121, "step": 423000 }, { "epoch": 3.97, "learning_rate": 1.0257878042829527e-05, "loss": 1.242, "step": 423500 }, { "epoch": 3.97, "eval_loss": 1.0613994598388672, "eval_runtime": 221.2789, "eval_samples_per_second": 451.918, "eval_steps_per_second": 14.122, "step": 423500 }, { "epoch": 3.98, "learning_rate": 1.0210957001557778e-05, "loss": 1.2245, "step": 424000 }, { "epoch": 3.98, "eval_loss": 1.0643956661224365, "eval_runtime": 221.2185, "eval_samples_per_second": 452.042, "eval_steps_per_second": 14.126, "step": 424000 }, { "epoch": 3.98, "learning_rate": 1.0164035960286032e-05, "loss": 1.2288, "step": 424500 }, { "epoch": 3.98, "eval_loss": 1.0604496002197266, "eval_runtime": 221.1793, "eval_samples_per_second": 452.122, "eval_steps_per_second": 14.129, "step": 424500 }, { "epoch": 3.99, "learning_rate": 1.0117114919014283e-05, "loss": 1.2308, "step": 425000 }, { "epoch": 3.99, "eval_loss": 1.0600295066833496, "eval_runtime": 221.044, "eval_samples_per_second": 452.399, "eval_steps_per_second": 14.137, "step": 425000 }, { "epoch": 3.99, "learning_rate": 1.0070193877742535e-05, "loss": 1.2294, "step": 425500 }, { "epoch": 3.99, "eval_loss": 1.066137433052063, "eval_runtime": 220.9979, "eval_samples_per_second": 452.493, "eval_steps_per_second": 14.14, "step": 425500 }, { "epoch": 4.0, "learning_rate": 1.0023272836470788e-05, "loss": 1.2377, "step": 426000 }, { "epoch": 4.0, "eval_loss": 1.0587615966796875, "eval_runtime": 220.9922, "eval_samples_per_second": 452.505, "eval_steps_per_second": 14.141, "step": 426000 }, { "epoch": 4.0, "learning_rate": 9.97635179519904e-06, "loss": 1.219, "step": 426500 }, { "epoch": 4.0, "eval_loss": 1.0649616718292236, "eval_runtime": 221.0888, "eval_samples_per_second": 452.307, "eval_steps_per_second": 14.135, "step": 426500 }, { "epoch": 4.01, "learning_rate": 9.929430753927293e-06, "loss": 1.2135, "step": 427000 }, { "epoch": 4.01, "eval_loss": 1.0579185485839844, "eval_runtime": 221.0924, "eval_samples_per_second": 452.3, "eval_steps_per_second": 14.134, "step": 427000 }, { "epoch": 4.01, "learning_rate": 9.882509712655545e-06, "loss": 1.2194, "step": 427500 }, { "epoch": 4.01, "eval_loss": 1.0547699928283691, "eval_runtime": 221.1208, "eval_samples_per_second": 452.241, "eval_steps_per_second": 14.133, "step": 427500 }, { "epoch": 4.02, "learning_rate": 9.835588671383796e-06, "loss": 1.2386, "step": 428000 }, { "epoch": 4.02, "eval_loss": 1.0614423751831055, "eval_runtime": 221.0918, "eval_samples_per_second": 452.301, "eval_steps_per_second": 14.134, "step": 428000 }, { "epoch": 4.02, "learning_rate": 9.788667630112048e-06, "loss": 1.2259, "step": 428500 }, { "epoch": 4.02, "eval_loss": 1.0620990991592407, "eval_runtime": 221.0818, "eval_samples_per_second": 452.321, "eval_steps_per_second": 14.135, "step": 428500 }, { "epoch": 4.03, "learning_rate": 9.741746588840301e-06, "loss": 1.2263, "step": 429000 }, { "epoch": 4.03, "eval_loss": 1.0643956661224365, "eval_runtime": 221.0828, "eval_samples_per_second": 452.319, "eval_steps_per_second": 14.135, "step": 429000 }, { "epoch": 4.03, "learning_rate": 9.694825547568553e-06, "loss": 1.2305, "step": 429500 }, { "epoch": 4.03, "eval_loss": 1.0544147491455078, "eval_runtime": 221.1024, "eval_samples_per_second": 452.279, "eval_steps_per_second": 14.134, "step": 429500 }, { "epoch": 4.04, "learning_rate": 9.647904506296804e-06, "loss": 1.2101, "step": 430000 }, { "epoch": 4.04, "eval_loss": 1.0544720888137817, "eval_runtime": 221.1133, "eval_samples_per_second": 452.257, "eval_steps_per_second": 14.133, "step": 430000 }, { "epoch": 4.04, "learning_rate": 9.600983465025057e-06, "loss": 1.2334, "step": 430500 }, { "epoch": 4.04, "eval_loss": 1.0552457571029663, "eval_runtime": 220.9889, "eval_samples_per_second": 452.511, "eval_steps_per_second": 14.141, "step": 430500 }, { "epoch": 4.04, "learning_rate": 9.554062423753309e-06, "loss": 1.2204, "step": 431000 }, { "epoch": 4.04, "eval_loss": 1.0604522228240967, "eval_runtime": 221.1288, "eval_samples_per_second": 452.225, "eval_steps_per_second": 14.132, "step": 431000 }, { "epoch": 4.05, "learning_rate": 9.50714138248156e-06, "loss": 1.2375, "step": 431500 }, { "epoch": 4.05, "eval_loss": 1.0569729804992676, "eval_runtime": 221.1398, "eval_samples_per_second": 452.203, "eval_steps_per_second": 14.131, "step": 431500 }, { "epoch": 4.05, "learning_rate": 9.460220341209812e-06, "loss": 1.2491, "step": 432000 }, { "epoch": 4.05, "eval_loss": 1.052681803703308, "eval_runtime": 221.0809, "eval_samples_per_second": 452.323, "eval_steps_per_second": 14.135, "step": 432000 }, { "epoch": 4.06, "learning_rate": 9.413299299938066e-06, "loss": 1.2265, "step": 432500 }, { "epoch": 4.06, "eval_loss": 1.057190179824829, "eval_runtime": 221.0473, "eval_samples_per_second": 452.392, "eval_steps_per_second": 14.137, "step": 432500 }, { "epoch": 4.06, "learning_rate": 9.366378258666317e-06, "loss": 1.2213, "step": 433000 }, { "epoch": 4.06, "eval_loss": 1.0613055229187012, "eval_runtime": 220.9425, "eval_samples_per_second": 452.606, "eval_steps_per_second": 14.144, "step": 433000 }, { "epoch": 4.07, "learning_rate": 9.319457217394569e-06, "loss": 1.2242, "step": 433500 }, { "epoch": 4.07, "eval_loss": 1.054632306098938, "eval_runtime": 220.92, "eval_samples_per_second": 452.652, "eval_steps_per_second": 14.145, "step": 433500 }, { "epoch": 4.07, "learning_rate": 9.272536176122822e-06, "loss": 1.2335, "step": 434000 }, { "epoch": 4.07, "eval_loss": 1.0627985000610352, "eval_runtime": 220.9221, "eval_samples_per_second": 452.648, "eval_steps_per_second": 14.145, "step": 434000 }, { "epoch": 4.08, "learning_rate": 9.225615134851074e-06, "loss": 1.2306, "step": 434500 }, { "epoch": 4.08, "eval_loss": 1.058109164237976, "eval_runtime": 220.938, "eval_samples_per_second": 452.616, "eval_steps_per_second": 14.144, "step": 434500 }, { "epoch": 4.08, "learning_rate": 9.178694093579325e-06, "loss": 1.2317, "step": 435000 }, { "epoch": 4.08, "eval_loss": 1.060577392578125, "eval_runtime": 220.957, "eval_samples_per_second": 452.577, "eval_steps_per_second": 14.143, "step": 435000 }, { "epoch": 4.09, "learning_rate": 9.131773052307577e-06, "loss": 1.2207, "step": 435500 }, { "epoch": 4.09, "eval_loss": 1.0583726167678833, "eval_runtime": 220.9147, "eval_samples_per_second": 452.663, "eval_steps_per_second": 14.146, "step": 435500 }, { "epoch": 4.09, "learning_rate": 9.08485201103583e-06, "loss": 1.2118, "step": 436000 }, { "epoch": 4.09, "eval_loss": 1.0572487115859985, "eval_runtime": 220.9401, "eval_samples_per_second": 452.611, "eval_steps_per_second": 14.144, "step": 436000 }, { "epoch": 4.1, "learning_rate": 9.037930969764082e-06, "loss": 1.2033, "step": 436500 }, { "epoch": 4.1, "eval_loss": 1.0609925985336304, "eval_runtime": 220.9828, "eval_samples_per_second": 452.524, "eval_steps_per_second": 14.141, "step": 436500 }, { "epoch": 4.1, "learning_rate": 8.991009928492333e-06, "loss": 1.2224, "step": 437000 }, { "epoch": 4.1, "eval_loss": 1.0533952713012695, "eval_runtime": 220.933, "eval_samples_per_second": 452.626, "eval_steps_per_second": 14.145, "step": 437000 }, { "epoch": 4.11, "learning_rate": 8.944088887220585e-06, "loss": 1.223, "step": 437500 }, { "epoch": 4.11, "eval_loss": 1.0515124797821045, "eval_runtime": 220.9716, "eval_samples_per_second": 452.547, "eval_steps_per_second": 14.142, "step": 437500 }, { "epoch": 4.11, "learning_rate": 8.897167845948838e-06, "loss": 1.2215, "step": 438000 }, { "epoch": 4.11, "eval_loss": 1.0595310926437378, "eval_runtime": 220.9271, "eval_samples_per_second": 452.638, "eval_steps_per_second": 14.145, "step": 438000 }, { "epoch": 4.11, "learning_rate": 8.85024680467709e-06, "loss": 1.2241, "step": 438500 }, { "epoch": 4.11, "eval_loss": 1.055729627609253, "eval_runtime": 220.9211, "eval_samples_per_second": 452.65, "eval_steps_per_second": 14.145, "step": 438500 }, { "epoch": 4.12, "learning_rate": 8.803325763405341e-06, "loss": 1.1953, "step": 439000 }, { "epoch": 4.12, "eval_loss": 1.0478371381759644, "eval_runtime": 220.9454, "eval_samples_per_second": 452.6, "eval_steps_per_second": 14.144, "step": 439000 }, { "epoch": 4.12, "learning_rate": 8.756404722133595e-06, "loss": 1.2235, "step": 439500 }, { "epoch": 4.12, "eval_loss": 1.05348801612854, "eval_runtime": 221.0593, "eval_samples_per_second": 452.367, "eval_steps_per_second": 14.136, "step": 439500 }, { "epoch": 4.13, "learning_rate": 8.709483680861846e-06, "loss": 1.211, "step": 440000 }, { "epoch": 4.13, "eval_loss": 1.0495777130126953, "eval_runtime": 221.0334, "eval_samples_per_second": 452.42, "eval_steps_per_second": 14.138, "step": 440000 }, { "epoch": 4.13, "learning_rate": 8.662562639590098e-06, "loss": 1.2257, "step": 440500 }, { "epoch": 4.13, "eval_loss": 1.056633472442627, "eval_runtime": 220.9965, "eval_samples_per_second": 452.496, "eval_steps_per_second": 14.14, "step": 440500 }, { "epoch": 4.14, "learning_rate": 8.61564159831835e-06, "loss": 1.2171, "step": 441000 }, { "epoch": 4.14, "eval_loss": 1.0503853559494019, "eval_runtime": 220.9748, "eval_samples_per_second": 452.54, "eval_steps_per_second": 14.142, "step": 441000 }, { "epoch": 4.14, "learning_rate": 8.568720557046603e-06, "loss": 1.2105, "step": 441500 }, { "epoch": 4.14, "eval_loss": 1.0520578622817993, "eval_runtime": 220.8941, "eval_samples_per_second": 452.706, "eval_steps_per_second": 14.147, "step": 441500 }, { "epoch": 4.15, "learning_rate": 8.521799515774854e-06, "loss": 1.2045, "step": 442000 }, { "epoch": 4.15, "eval_loss": 1.053121566772461, "eval_runtime": 220.8544, "eval_samples_per_second": 452.787, "eval_steps_per_second": 14.15, "step": 442000 }, { "epoch": 4.15, "learning_rate": 8.474878474503106e-06, "loss": 1.205, "step": 442500 }, { "epoch": 4.15, "eval_loss": 1.0605696439743042, "eval_runtime": 220.8587, "eval_samples_per_second": 452.778, "eval_steps_per_second": 14.149, "step": 442500 }, { "epoch": 4.16, "learning_rate": 8.427957433231359e-06, "loss": 1.2145, "step": 443000 }, { "epoch": 4.16, "eval_loss": 1.0511351823806763, "eval_runtime": 220.8862, "eval_samples_per_second": 452.722, "eval_steps_per_second": 14.148, "step": 443000 }, { "epoch": 4.16, "learning_rate": 8.38103639195961e-06, "loss": 1.1973, "step": 443500 }, { "epoch": 4.16, "eval_loss": 1.0576379299163818, "eval_runtime": 220.8821, "eval_samples_per_second": 452.73, "eval_steps_per_second": 14.148, "step": 443500 }, { "epoch": 4.17, "learning_rate": 8.334115350687862e-06, "loss": 1.2061, "step": 444000 }, { "epoch": 4.17, "eval_loss": 1.0536248683929443, "eval_runtime": 220.8693, "eval_samples_per_second": 452.756, "eval_steps_per_second": 14.149, "step": 444000 }, { "epoch": 4.17, "learning_rate": 8.287194309416114e-06, "loss": 1.2241, "step": 444500 }, { "epoch": 4.17, "eval_loss": 1.0456198453903198, "eval_runtime": 220.876, "eval_samples_per_second": 452.743, "eval_steps_per_second": 14.148, "step": 444500 }, { "epoch": 4.18, "learning_rate": 8.240273268144367e-06, "loss": 1.1978, "step": 445000 }, { "epoch": 4.18, "eval_loss": 1.0541229248046875, "eval_runtime": 220.8941, "eval_samples_per_second": 452.706, "eval_steps_per_second": 14.147, "step": 445000 }, { "epoch": 4.18, "learning_rate": 8.193352226872619e-06, "loss": 1.2131, "step": 445500 }, { "epoch": 4.18, "eval_loss": 1.050695538520813, "eval_runtime": 220.8941, "eval_samples_per_second": 452.706, "eval_steps_per_second": 14.147, "step": 445500 }, { "epoch": 4.19, "learning_rate": 8.14643118560087e-06, "loss": 1.1941, "step": 446000 }, { "epoch": 4.19, "eval_loss": 1.0486228466033936, "eval_runtime": 220.8943, "eval_samples_per_second": 452.705, "eval_steps_per_second": 14.147, "step": 446000 }, { "epoch": 4.19, "learning_rate": 8.099510144329122e-06, "loss": 1.2137, "step": 446500 }, { "epoch": 4.19, "eval_loss": 1.050898790359497, "eval_runtime": 220.9099, "eval_samples_per_second": 452.673, "eval_steps_per_second": 14.146, "step": 446500 }, { "epoch": 4.19, "learning_rate": 8.052589103057375e-06, "loss": 1.212, "step": 447000 }, { "epoch": 4.19, "eval_loss": 1.052153468132019, "eval_runtime": 220.9042, "eval_samples_per_second": 452.685, "eval_steps_per_second": 14.146, "step": 447000 }, { "epoch": 4.2, "learning_rate": 8.005668061785627e-06, "loss": 1.2141, "step": 447500 }, { "epoch": 4.2, "eval_loss": 1.0504639148712158, "eval_runtime": 220.8838, "eval_samples_per_second": 452.727, "eval_steps_per_second": 14.148, "step": 447500 }, { "epoch": 4.2, "learning_rate": 7.958747020513878e-06, "loss": 1.2035, "step": 448000 }, { "epoch": 4.2, "eval_loss": 1.0515419244766235, "eval_runtime": 220.9254, "eval_samples_per_second": 452.642, "eval_steps_per_second": 14.145, "step": 448000 }, { "epoch": 4.21, "learning_rate": 7.911825979242132e-06, "loss": 1.2162, "step": 448500 }, { "epoch": 4.21, "eval_loss": 1.045407772064209, "eval_runtime": 221.0538, "eval_samples_per_second": 452.379, "eval_steps_per_second": 14.137, "step": 448500 }, { "epoch": 4.21, "learning_rate": 7.864904937970383e-06, "loss": 1.2062, "step": 449000 }, { "epoch": 4.21, "eval_loss": 1.0419821739196777, "eval_runtime": 221.0256, "eval_samples_per_second": 452.436, "eval_steps_per_second": 14.139, "step": 449000 }, { "epoch": 4.22, "learning_rate": 7.817983896698637e-06, "loss": 1.1996, "step": 449500 }, { "epoch": 4.22, "eval_loss": 1.0498727560043335, "eval_runtime": 220.9307, "eval_samples_per_second": 452.631, "eval_steps_per_second": 14.145, "step": 449500 }, { "epoch": 4.22, "learning_rate": 7.771062855426888e-06, "loss": 1.2194, "step": 450000 }, { "epoch": 4.22, "eval_loss": 1.0517551898956299, "eval_runtime": 220.9312, "eval_samples_per_second": 452.63, "eval_steps_per_second": 14.145, "step": 450000 }, { "epoch": 4.23, "learning_rate": 7.72414181415514e-06, "loss": 1.2062, "step": 450500 }, { "epoch": 4.23, "eval_loss": 1.0490150451660156, "eval_runtime": 220.7475, "eval_samples_per_second": 453.006, "eval_steps_per_second": 14.156, "step": 450500 }, { "epoch": 4.23, "learning_rate": 7.677220772883393e-06, "loss": 1.2046, "step": 451000 }, { "epoch": 4.23, "eval_loss": 1.0536123514175415, "eval_runtime": 220.7673, "eval_samples_per_second": 452.966, "eval_steps_per_second": 14.155, "step": 451000 }, { "epoch": 4.24, "learning_rate": 7.630299731611645e-06, "loss": 1.2012, "step": 451500 }, { "epoch": 4.24, "eval_loss": 1.0506072044372559, "eval_runtime": 220.7213, "eval_samples_per_second": 453.06, "eval_steps_per_second": 14.158, "step": 451500 }, { "epoch": 4.24, "learning_rate": 7.583378690339897e-06, "loss": 1.2161, "step": 452000 }, { "epoch": 4.24, "eval_loss": 1.0469845533370972, "eval_runtime": 220.7698, "eval_samples_per_second": 452.96, "eval_steps_per_second": 14.155, "step": 452000 }, { "epoch": 4.25, "learning_rate": 7.536457649068149e-06, "loss": 1.2054, "step": 452500 }, { "epoch": 4.25, "eval_loss": 1.0446325540542603, "eval_runtime": 220.7759, "eval_samples_per_second": 452.948, "eval_steps_per_second": 14.155, "step": 452500 }, { "epoch": 4.25, "learning_rate": 7.4895366077964e-06, "loss": 1.208, "step": 453000 }, { "epoch": 4.25, "eval_loss": 1.0471805334091187, "eval_runtime": 220.7642, "eval_samples_per_second": 452.972, "eval_steps_per_second": 14.155, "step": 453000 }, { "epoch": 4.26, "learning_rate": 7.442615566524652e-06, "loss": 1.1995, "step": 453500 }, { "epoch": 4.26, "eval_loss": 1.0422724485397339, "eval_runtime": 220.7863, "eval_samples_per_second": 452.927, "eval_steps_per_second": 14.154, "step": 453500 }, { "epoch": 4.26, "learning_rate": 7.395694525252905e-06, "loss": 1.2153, "step": 454000 }, { "epoch": 4.26, "eval_loss": 1.046839714050293, "eval_runtime": 220.7559, "eval_samples_per_second": 452.989, "eval_steps_per_second": 14.156, "step": 454000 }, { "epoch": 4.27, "learning_rate": 7.348773483981157e-06, "loss": 1.1999, "step": 454500 }, { "epoch": 4.27, "eval_loss": 1.0423718690872192, "eval_runtime": 220.7317, "eval_samples_per_second": 453.039, "eval_steps_per_second": 14.157, "step": 454500 }, { "epoch": 4.27, "learning_rate": 7.301852442709409e-06, "loss": 1.1919, "step": 455000 }, { "epoch": 4.27, "eval_loss": 1.040187120437622, "eval_runtime": 220.7387, "eval_samples_per_second": 453.024, "eval_steps_per_second": 14.157, "step": 455000 }, { "epoch": 4.27, "learning_rate": 7.254931401437661e-06, "loss": 1.1848, "step": 455500 }, { "epoch": 4.27, "eval_loss": 1.0394939184188843, "eval_runtime": 220.7554, "eval_samples_per_second": 452.99, "eval_steps_per_second": 14.156, "step": 455500 }, { "epoch": 4.28, "learning_rate": 7.208010360165913e-06, "loss": 1.2012, "step": 456000 }, { "epoch": 4.28, "eval_loss": 1.0482308864593506, "eval_runtime": 220.7304, "eval_samples_per_second": 453.041, "eval_steps_per_second": 14.158, "step": 456000 }, { "epoch": 4.28, "learning_rate": 7.161089318894166e-06, "loss": 1.2028, "step": 456500 }, { "epoch": 4.28, "eval_loss": 1.0419962406158447, "eval_runtime": 220.7448, "eval_samples_per_second": 453.012, "eval_steps_per_second": 14.157, "step": 456500 }, { "epoch": 4.29, "learning_rate": 7.114168277622417e-06, "loss": 1.212, "step": 457000 }, { "epoch": 4.29, "eval_loss": 1.0430151224136353, "eval_runtime": 220.7318, "eval_samples_per_second": 453.038, "eval_steps_per_second": 14.157, "step": 457000 }, { "epoch": 4.29, "learning_rate": 7.06724723635067e-06, "loss": 1.1949, "step": 457500 }, { "epoch": 4.29, "eval_loss": 1.047107219696045, "eval_runtime": 220.7582, "eval_samples_per_second": 452.984, "eval_steps_per_second": 14.156, "step": 457500 }, { "epoch": 4.3, "learning_rate": 7.020326195078922e-06, "loss": 1.2095, "step": 458000 }, { "epoch": 4.3, "eval_loss": 1.0399595499038696, "eval_runtime": 220.8799, "eval_samples_per_second": 452.735, "eval_steps_per_second": 14.148, "step": 458000 }, { "epoch": 4.3, "learning_rate": 6.973405153807174e-06, "loss": 1.2085, "step": 458500 }, { "epoch": 4.3, "eval_loss": 1.0479100942611694, "eval_runtime": 220.8879, "eval_samples_per_second": 452.718, "eval_steps_per_second": 14.147, "step": 458500 }, { "epoch": 4.31, "learning_rate": 6.926484112535425e-06, "loss": 1.1901, "step": 459000 }, { "epoch": 4.31, "eval_loss": 1.0417808294296265, "eval_runtime": 220.8257, "eval_samples_per_second": 452.846, "eval_steps_per_second": 14.151, "step": 459000 }, { "epoch": 4.31, "learning_rate": 6.8795630712636785e-06, "loss": 1.2034, "step": 459500 }, { "epoch": 4.31, "eval_loss": 1.0389989614486694, "eval_runtime": 220.7906, "eval_samples_per_second": 452.918, "eval_steps_per_second": 14.154, "step": 459500 }, { "epoch": 4.32, "learning_rate": 6.83264202999193e-06, "loss": 1.225, "step": 460000 }, { "epoch": 4.32, "eval_loss": 1.0430644750595093, "eval_runtime": 220.6771, "eval_samples_per_second": 453.151, "eval_steps_per_second": 14.161, "step": 460000 }, { "epoch": 4.32, "learning_rate": 6.785720988720182e-06, "loss": 1.1967, "step": 460500 }, { "epoch": 4.32, "eval_loss": 1.0424212217330933, "eval_runtime": 220.4736, "eval_samples_per_second": 453.569, "eval_steps_per_second": 14.174, "step": 460500 }, { "epoch": 4.33, "learning_rate": 6.738799947448435e-06, "loss": 1.2059, "step": 461000 }, { "epoch": 4.33, "eval_loss": 1.0408573150634766, "eval_runtime": 220.4712, "eval_samples_per_second": 453.574, "eval_steps_per_second": 14.174, "step": 461000 }, { "epoch": 4.33, "learning_rate": 6.691878906176687e-06, "loss": 1.1939, "step": 461500 }, { "epoch": 4.33, "eval_loss": 1.0424449443817139, "eval_runtime": 220.5212, "eval_samples_per_second": 453.471, "eval_steps_per_second": 14.171, "step": 461500 }, { "epoch": 4.34, "learning_rate": 6.644957864904938e-06, "loss": 1.2027, "step": 462000 }, { "epoch": 4.34, "eval_loss": 1.04022216796875, "eval_runtime": 220.5052, "eval_samples_per_second": 453.504, "eval_steps_per_second": 14.172, "step": 462000 }, { "epoch": 4.34, "learning_rate": 6.59803682363319e-06, "loss": 1.1745, "step": 462500 }, { "epoch": 4.34, "eval_loss": 1.0380291938781738, "eval_runtime": 220.5027, "eval_samples_per_second": 453.509, "eval_steps_per_second": 14.172, "step": 462500 }, { "epoch": 4.34, "learning_rate": 6.551115782361443e-06, "loss": 1.2155, "step": 463000 }, { "epoch": 4.34, "eval_loss": 1.0391091108322144, "eval_runtime": 220.4856, "eval_samples_per_second": 453.544, "eval_steps_per_second": 14.173, "step": 463000 }, { "epoch": 4.35, "learning_rate": 6.504194741089695e-06, "loss": 1.2033, "step": 463500 }, { "epoch": 4.35, "eval_loss": 1.042190670967102, "eval_runtime": 220.4876, "eval_samples_per_second": 453.54, "eval_steps_per_second": 14.173, "step": 463500 }, { "epoch": 4.35, "learning_rate": 6.457273699817946e-06, "loss": 1.2064, "step": 464000 }, { "epoch": 4.35, "eval_loss": 1.040285348892212, "eval_runtime": 220.4809, "eval_samples_per_second": 453.554, "eval_steps_per_second": 14.174, "step": 464000 }, { "epoch": 4.36, "learning_rate": 6.410352658546198e-06, "loss": 1.1943, "step": 464500 }, { "epoch": 4.36, "eval_loss": 1.0418440103530884, "eval_runtime": 220.482, "eval_samples_per_second": 453.552, "eval_steps_per_second": 14.173, "step": 464500 }, { "epoch": 4.36, "learning_rate": 6.363431617274451e-06, "loss": 1.1875, "step": 465000 }, { "epoch": 4.36, "eval_loss": 1.0404266119003296, "eval_runtime": 220.484, "eval_samples_per_second": 453.548, "eval_steps_per_second": 14.173, "step": 465000 }, { "epoch": 4.37, "learning_rate": 6.316510576002703e-06, "loss": 1.1978, "step": 465500 }, { "epoch": 4.37, "eval_loss": 1.0388667583465576, "eval_runtime": 220.5052, "eval_samples_per_second": 453.504, "eval_steps_per_second": 14.172, "step": 465500 }, { "epoch": 4.37, "learning_rate": 6.269589534730954e-06, "loss": 1.1963, "step": 466000 }, { "epoch": 4.37, "eval_loss": 1.039662480354309, "eval_runtime": 220.4721, "eval_samples_per_second": 453.572, "eval_steps_per_second": 14.174, "step": 466000 }, { "epoch": 4.38, "learning_rate": 6.222668493459207e-06, "loss": 1.1991, "step": 466500 }, { "epoch": 4.38, "eval_loss": 1.0323457717895508, "eval_runtime": 220.4655, "eval_samples_per_second": 453.586, "eval_steps_per_second": 14.175, "step": 466500 }, { "epoch": 4.38, "learning_rate": 6.175747452187459e-06, "loss": 1.2045, "step": 467000 }, { "epoch": 4.38, "eval_loss": 1.0490866899490356, "eval_runtime": 220.4714, "eval_samples_per_second": 453.574, "eval_steps_per_second": 14.174, "step": 467000 }, { "epoch": 4.39, "learning_rate": 6.128826410915712e-06, "loss": 1.1782, "step": 467500 }, { "epoch": 4.39, "eval_loss": 1.0345863103866577, "eval_runtime": 220.4724, "eval_samples_per_second": 453.571, "eval_steps_per_second": 14.174, "step": 467500 }, { "epoch": 4.39, "learning_rate": 6.081905369643963e-06, "loss": 1.1832, "step": 468000 }, { "epoch": 4.39, "eval_loss": 1.0386844873428345, "eval_runtime": 220.5673, "eval_samples_per_second": 453.376, "eval_steps_per_second": 14.168, "step": 468000 }, { "epoch": 4.4, "learning_rate": 6.034984328372216e-06, "loss": 1.2035, "step": 468500 }, { "epoch": 4.4, "eval_loss": 1.0390254259109497, "eval_runtime": 220.603, "eval_samples_per_second": 453.303, "eval_steps_per_second": 14.166, "step": 468500 }, { "epoch": 4.4, "learning_rate": 5.988063287100468e-06, "loss": 1.2036, "step": 469000 }, { "epoch": 4.4, "eval_loss": 1.0352883338928223, "eval_runtime": 220.4977, "eval_samples_per_second": 453.52, "eval_steps_per_second": 14.172, "step": 469000 }, { "epoch": 4.41, "learning_rate": 5.94114224582872e-06, "loss": 1.2, "step": 469500 }, { "epoch": 4.41, "eval_loss": 1.03839111328125, "eval_runtime": 220.4659, "eval_samples_per_second": 453.585, "eval_steps_per_second": 14.175, "step": 469500 }, { "epoch": 4.41, "learning_rate": 5.894221204556972e-06, "loss": 1.1869, "step": 470000 }, { "epoch": 4.41, "eval_loss": 1.0423153638839722, "eval_runtime": 220.3637, "eval_samples_per_second": 453.795, "eval_steps_per_second": 14.181, "step": 470000 }, { "epoch": 4.42, "learning_rate": 5.8473001632852245e-06, "loss": 1.2012, "step": 470500 }, { "epoch": 4.42, "eval_loss": 1.0392744541168213, "eval_runtime": 220.3412, "eval_samples_per_second": 453.842, "eval_steps_per_second": 14.183, "step": 470500 }, { "epoch": 4.42, "learning_rate": 5.800379122013476e-06, "loss": 1.1912, "step": 471000 }, { "epoch": 4.42, "eval_loss": 1.0390135049819946, "eval_runtime": 220.3713, "eval_samples_per_second": 453.78, "eval_steps_per_second": 14.181, "step": 471000 }, { "epoch": 4.42, "learning_rate": 5.7534580807417286e-06, "loss": 1.2062, "step": 471500 }, { "epoch": 4.42, "eval_loss": 1.0349584817886353, "eval_runtime": 220.3752, "eval_samples_per_second": 453.771, "eval_steps_per_second": 14.18, "step": 471500 }, { "epoch": 4.43, "learning_rate": 5.70653703946998e-06, "loss": 1.2079, "step": 472000 }, { "epoch": 4.43, "eval_loss": 1.0324441194534302, "eval_runtime": 220.391, "eval_samples_per_second": 453.739, "eval_steps_per_second": 14.179, "step": 472000 }, { "epoch": 4.43, "learning_rate": 5.659615998198233e-06, "loss": 1.1967, "step": 472500 }, { "epoch": 4.43, "eval_loss": 1.0354297161102295, "eval_runtime": 220.4046, "eval_samples_per_second": 453.711, "eval_steps_per_second": 14.178, "step": 472500 }, { "epoch": 4.44, "learning_rate": 5.612694956926484e-06, "loss": 1.194, "step": 473000 }, { "epoch": 4.44, "eval_loss": 1.0334423780441284, "eval_runtime": 220.39, "eval_samples_per_second": 453.741, "eval_steps_per_second": 14.179, "step": 473000 }, { "epoch": 4.44, "learning_rate": 5.565773915654737e-06, "loss": 1.1962, "step": 473500 }, { "epoch": 4.44, "eval_loss": 1.0328459739685059, "eval_runtime": 220.3907, "eval_samples_per_second": 453.74, "eval_steps_per_second": 14.179, "step": 473500 }, { "epoch": 4.45, "learning_rate": 5.518852874382988e-06, "loss": 1.209, "step": 474000 }, { "epoch": 4.45, "eval_loss": 1.0326672792434692, "eval_runtime": 220.4416, "eval_samples_per_second": 453.635, "eval_steps_per_second": 14.176, "step": 474000 }, { "epoch": 4.45, "learning_rate": 5.471931833111241e-06, "loss": 1.1964, "step": 474500 }, { "epoch": 4.45, "eval_loss": 1.0362826585769653, "eval_runtime": 220.4411, "eval_samples_per_second": 453.636, "eval_steps_per_second": 14.176, "step": 474500 }, { "epoch": 4.46, "learning_rate": 5.425010791839493e-06, "loss": 1.1993, "step": 475000 }, { "epoch": 4.46, "eval_loss": 1.0309995412826538, "eval_runtime": 220.4439, "eval_samples_per_second": 453.63, "eval_steps_per_second": 14.176, "step": 475000 }, { "epoch": 4.46, "learning_rate": 5.378089750567745e-06, "loss": 1.2089, "step": 475500 }, { "epoch": 4.46, "eval_loss": 1.0346201658248901, "eval_runtime": 220.3965, "eval_samples_per_second": 453.728, "eval_steps_per_second": 14.179, "step": 475500 }, { "epoch": 4.47, "learning_rate": 5.331168709295997e-06, "loss": 1.2001, "step": 476000 }, { "epoch": 4.47, "eval_loss": 1.0281124114990234, "eval_runtime": 220.429, "eval_samples_per_second": 453.661, "eval_steps_per_second": 14.177, "step": 476000 }, { "epoch": 4.47, "learning_rate": 5.284247668024249e-06, "loss": 1.2331, "step": 476500 }, { "epoch": 4.47, "eval_loss": 1.0332673788070679, "eval_runtime": 220.4002, "eval_samples_per_second": 453.72, "eval_steps_per_second": 14.179, "step": 476500 }, { "epoch": 4.48, "learning_rate": 5.237326626752501e-06, "loss": 1.2059, "step": 477000 }, { "epoch": 4.48, "eval_loss": 1.033066987991333, "eval_runtime": 220.4364, "eval_samples_per_second": 453.646, "eval_steps_per_second": 14.176, "step": 477000 }, { "epoch": 4.48, "learning_rate": 5.190405585480753e-06, "loss": 1.1919, "step": 477500 }, { "epoch": 4.48, "eval_loss": 1.035326600074768, "eval_runtime": 220.4207, "eval_samples_per_second": 453.678, "eval_steps_per_second": 14.177, "step": 477500 }, { "epoch": 4.49, "learning_rate": 5.143484544209005e-06, "loss": 1.2028, "step": 478000 }, { "epoch": 4.49, "eval_loss": 1.0330538749694824, "eval_runtime": 220.3873, "eval_samples_per_second": 453.747, "eval_steps_per_second": 14.18, "step": 478000 }, { "epoch": 4.49, "learning_rate": 5.096563502937257e-06, "loss": 1.1815, "step": 478500 }, { "epoch": 4.49, "eval_loss": 1.0284647941589355, "eval_runtime": 220.3832, "eval_samples_per_second": 453.755, "eval_steps_per_second": 14.18, "step": 478500 }, { "epoch": 4.5, "learning_rate": 5.049642461665509e-06, "loss": 1.2108, "step": 479000 }, { "epoch": 4.5, "eval_loss": 1.0274449586868286, "eval_runtime": 220.5409, "eval_samples_per_second": 453.431, "eval_steps_per_second": 14.17, "step": 479000 }, { "epoch": 4.5, "learning_rate": 5.002721420393762e-06, "loss": 1.1973, "step": 479500 }, { "epoch": 4.5, "eval_loss": 1.030910849571228, "eval_runtime": 220.517, "eval_samples_per_second": 453.48, "eval_steps_per_second": 14.171, "step": 479500 }, { "epoch": 4.5, "learning_rate": 4.955800379122014e-06, "loss": 1.2209, "step": 480000 }, { "epoch": 4.5, "eval_loss": 1.0277793407440186, "eval_runtime": 220.48, "eval_samples_per_second": 453.556, "eval_steps_per_second": 14.174, "step": 480000 }, { "epoch": 4.51, "learning_rate": 4.908879337850266e-06, "loss": 1.1701, "step": 480500 }, { "epoch": 4.51, "eval_loss": 1.0242763757705688, "eval_runtime": 220.3969, "eval_samples_per_second": 453.727, "eval_steps_per_second": 14.179, "step": 480500 }, { "epoch": 4.51, "learning_rate": 4.861958296578518e-06, "loss": 1.1876, "step": 481000 }, { "epoch": 4.51, "eval_loss": 1.0288127660751343, "eval_runtime": 220.2841, "eval_samples_per_second": 453.959, "eval_steps_per_second": 14.186, "step": 481000 }, { "epoch": 4.52, "learning_rate": 4.8150372553067706e-06, "loss": 1.1949, "step": 481500 }, { "epoch": 4.52, "eval_loss": 1.0324766635894775, "eval_runtime": 220.2548, "eval_samples_per_second": 454.02, "eval_steps_per_second": 14.188, "step": 481500 }, { "epoch": 4.52, "learning_rate": 4.768116214035022e-06, "loss": 1.1933, "step": 482000 }, { "epoch": 4.52, "eval_loss": 1.0230944156646729, "eval_runtime": 220.3139, "eval_samples_per_second": 453.898, "eval_steps_per_second": 14.184, "step": 482000 }, { "epoch": 4.53, "learning_rate": 4.721195172763275e-06, "loss": 1.1811, "step": 482500 }, { "epoch": 4.53, "eval_loss": 1.029863953590393, "eval_runtime": 220.2726, "eval_samples_per_second": 453.983, "eval_steps_per_second": 14.187, "step": 482500 }, { "epoch": 4.53, "learning_rate": 4.674274131491526e-06, "loss": 1.2033, "step": 483000 }, { "epoch": 4.53, "eval_loss": 1.0319043397903442, "eval_runtime": 220.2655, "eval_samples_per_second": 453.998, "eval_steps_per_second": 14.187, "step": 483000 }, { "epoch": 4.54, "learning_rate": 4.627353090219779e-06, "loss": 1.1919, "step": 483500 }, { "epoch": 4.54, "eval_loss": 1.0290050506591797, "eval_runtime": 220.268, "eval_samples_per_second": 453.992, "eval_steps_per_second": 14.187, "step": 483500 }, { "epoch": 4.54, "learning_rate": 4.58043204894803e-06, "loss": 1.195, "step": 484000 }, { "epoch": 4.54, "eval_loss": 1.0286130905151367, "eval_runtime": 220.2866, "eval_samples_per_second": 453.954, "eval_steps_per_second": 14.186, "step": 484000 }, { "epoch": 4.55, "learning_rate": 4.533511007676283e-06, "loss": 1.1795, "step": 484500 }, { "epoch": 4.55, "eval_loss": 1.0310746431350708, "eval_runtime": 220.3028, "eval_samples_per_second": 453.921, "eval_steps_per_second": 14.185, "step": 484500 }, { "epoch": 4.55, "learning_rate": 4.486589966404535e-06, "loss": 1.1981, "step": 485000 }, { "epoch": 4.55, "eval_loss": 1.0278400182724, "eval_runtime": 220.2765, "eval_samples_per_second": 453.975, "eval_steps_per_second": 14.187, "step": 485000 }, { "epoch": 4.56, "learning_rate": 4.439668925132787e-06, "loss": 1.1935, "step": 485500 }, { "epoch": 4.56, "eval_loss": 1.0325404405593872, "eval_runtime": 220.258, "eval_samples_per_second": 454.013, "eval_steps_per_second": 14.188, "step": 485500 }, { "epoch": 4.56, "learning_rate": 4.392747883861039e-06, "loss": 1.1901, "step": 486000 }, { "epoch": 4.56, "eval_loss": 1.029636263847351, "eval_runtime": 220.2457, "eval_samples_per_second": 454.038, "eval_steps_per_second": 14.189, "step": 486000 }, { "epoch": 4.57, "learning_rate": 4.345826842589291e-06, "loss": 1.1998, "step": 486500 }, { "epoch": 4.57, "eval_loss": 1.0245474576950073, "eval_runtime": 220.2767, "eval_samples_per_second": 453.975, "eval_steps_per_second": 14.187, "step": 486500 }, { "epoch": 4.57, "learning_rate": 4.298905801317543e-06, "loss": 1.1818, "step": 487000 }, { "epoch": 4.57, "eval_loss": 1.0290453433990479, "eval_runtime": 220.2654, "eval_samples_per_second": 453.998, "eval_steps_per_second": 14.187, "step": 487000 }, { "epoch": 4.57, "learning_rate": 4.251984760045795e-06, "loss": 1.1713, "step": 487500 }, { "epoch": 4.57, "eval_loss": 1.028717279434204, "eval_runtime": 220.2563, "eval_samples_per_second": 454.016, "eval_steps_per_second": 14.188, "step": 487500 }, { "epoch": 4.58, "learning_rate": 4.205063718774047e-06, "loss": 1.1841, "step": 488000 }, { "epoch": 4.58, "eval_loss": 1.0243215560913086, "eval_runtime": 220.2412, "eval_samples_per_second": 454.048, "eval_steps_per_second": 14.189, "step": 488000 }, { "epoch": 4.58, "learning_rate": 4.158142677502299e-06, "loss": 1.184, "step": 488500 }, { "epoch": 4.58, "eval_loss": 1.0320249795913696, "eval_runtime": 220.2122, "eval_samples_per_second": 454.107, "eval_steps_per_second": 14.191, "step": 488500 }, { "epoch": 4.59, "learning_rate": 4.111221636230551e-06, "loss": 1.1945, "step": 489000 }, { "epoch": 4.59, "eval_loss": 1.0234476327896118, "eval_runtime": 220.2507, "eval_samples_per_second": 454.028, "eval_steps_per_second": 14.188, "step": 489000 }, { "epoch": 4.59, "learning_rate": 4.064300594958804e-06, "loss": 1.1826, "step": 489500 }, { "epoch": 4.59, "eval_loss": 1.027974009513855, "eval_runtime": 220.2473, "eval_samples_per_second": 454.035, "eval_steps_per_second": 14.189, "step": 489500 }, { "epoch": 4.6, "learning_rate": 4.017379553687055e-06, "loss": 1.188, "step": 490000 }, { "epoch": 4.6, "eval_loss": 1.0277783870697021, "eval_runtime": 220.2581, "eval_samples_per_second": 454.013, "eval_steps_per_second": 14.188, "step": 490000 }, { "epoch": 4.6, "learning_rate": 3.970458512415308e-06, "loss": 1.1939, "step": 490500 }, { "epoch": 4.6, "eval_loss": 1.0183277130126953, "eval_runtime": 220.2638, "eval_samples_per_second": 454.001, "eval_steps_per_second": 14.188, "step": 490500 }, { "epoch": 4.61, "learning_rate": 3.923537471143559e-06, "loss": 1.1657, "step": 491000 }, { "epoch": 4.61, "eval_loss": 1.029819369316101, "eval_runtime": 220.2579, "eval_samples_per_second": 454.013, "eval_steps_per_second": 14.188, "step": 491000 }, { "epoch": 4.61, "learning_rate": 3.876616429871812e-06, "loss": 1.1917, "step": 491500 }, { "epoch": 4.61, "eval_loss": 1.0290553569793701, "eval_runtime": 220.4728, "eval_samples_per_second": 453.571, "eval_steps_per_second": 14.174, "step": 491500 }, { "epoch": 4.62, "learning_rate": 3.829695388600064e-06, "loss": 1.1945, "step": 492000 }, { "epoch": 4.62, "eval_loss": 1.031397819519043, "eval_runtime": 220.4389, "eval_samples_per_second": 453.64, "eval_steps_per_second": 14.176, "step": 492000 }, { "epoch": 4.62, "learning_rate": 3.782774347328316e-06, "loss": 1.2071, "step": 492500 }, { "epoch": 4.62, "eval_loss": 1.027860164642334, "eval_runtime": 220.3723, "eval_samples_per_second": 453.778, "eval_steps_per_second": 14.181, "step": 492500 }, { "epoch": 4.63, "learning_rate": 3.7358533060565677e-06, "loss": 1.2013, "step": 493000 }, { "epoch": 4.63, "eval_loss": 1.020934820175171, "eval_runtime": 220.3406, "eval_samples_per_second": 453.843, "eval_steps_per_second": 14.183, "step": 493000 }, { "epoch": 4.63, "learning_rate": 3.68893226478482e-06, "loss": 1.1762, "step": 493500 }, { "epoch": 4.63, "eval_loss": 1.0232270956039429, "eval_runtime": 220.2361, "eval_samples_per_second": 454.058, "eval_steps_per_second": 14.189, "step": 493500 }, { "epoch": 4.64, "learning_rate": 3.6420112235130726e-06, "loss": 1.2014, "step": 494000 }, { "epoch": 4.64, "eval_loss": 1.0220469236373901, "eval_runtime": 220.2877, "eval_samples_per_second": 453.952, "eval_steps_per_second": 14.186, "step": 494000 }, { "epoch": 4.64, "learning_rate": 3.595090182241324e-06, "loss": 1.1817, "step": 494500 }, { "epoch": 4.64, "eval_loss": 1.024511456489563, "eval_runtime": 220.3417, "eval_samples_per_second": 453.841, "eval_steps_per_second": 14.183, "step": 494500 }, { "epoch": 4.65, "learning_rate": 3.5481691409695766e-06, "loss": 1.1694, "step": 495000 }, { "epoch": 4.65, "eval_loss": 1.0245423316955566, "eval_runtime": 220.3569, "eval_samples_per_second": 453.809, "eval_steps_per_second": 14.182, "step": 495000 }, { "epoch": 4.65, "learning_rate": 3.5012480996978287e-06, "loss": 1.1864, "step": 495500 }, { "epoch": 4.65, "eval_loss": 1.0235341787338257, "eval_runtime": 220.3891, "eval_samples_per_second": 453.743, "eval_steps_per_second": 14.179, "step": 495500 }, { "epoch": 4.65, "learning_rate": 3.454327058426081e-06, "loss": 1.2, "step": 496000 }, { "epoch": 4.65, "eval_loss": 1.0256927013397217, "eval_runtime": 220.4025, "eval_samples_per_second": 453.715, "eval_steps_per_second": 14.179, "step": 496000 }, { "epoch": 4.66, "learning_rate": 3.4074060171543327e-06, "loss": 1.1897, "step": 496500 }, { "epoch": 4.66, "eval_loss": 1.0257229804992676, "eval_runtime": 220.4604, "eval_samples_per_second": 453.596, "eval_steps_per_second": 14.175, "step": 496500 }, { "epoch": 4.66, "learning_rate": 3.360484975882585e-06, "loss": 1.1929, "step": 497000 }, { "epoch": 4.66, "eval_loss": 1.023755431175232, "eval_runtime": 220.4521, "eval_samples_per_second": 453.613, "eval_steps_per_second": 14.175, "step": 497000 }, { "epoch": 4.67, "learning_rate": 3.3135639346108367e-06, "loss": 1.1853, "step": 497500 }, { "epoch": 4.67, "eval_loss": 1.0197463035583496, "eval_runtime": 220.4946, "eval_samples_per_second": 453.526, "eval_steps_per_second": 14.173, "step": 497500 }, { "epoch": 4.67, "learning_rate": 3.266642893339089e-06, "loss": 1.1909, "step": 498000 }, { "epoch": 4.67, "eval_loss": 1.0215179920196533, "eval_runtime": 220.5236, "eval_samples_per_second": 453.466, "eval_steps_per_second": 14.171, "step": 498000 }, { "epoch": 4.68, "learning_rate": 3.2197218520673416e-06, "loss": 1.1784, "step": 498500 }, { "epoch": 4.68, "eval_loss": 1.0229579210281372, "eval_runtime": 220.8462, "eval_samples_per_second": 452.804, "eval_steps_per_second": 14.15, "step": 498500 }, { "epoch": 4.68, "learning_rate": 3.172800810795593e-06, "loss": 1.1955, "step": 499000 }, { "epoch": 4.68, "eval_loss": 1.019065499305725, "eval_runtime": 220.55, "eval_samples_per_second": 453.412, "eval_steps_per_second": 14.169, "step": 499000 }, { "epoch": 4.69, "learning_rate": 3.1258797695238456e-06, "loss": 1.177, "step": 499500 }, { "epoch": 4.69, "eval_loss": 1.021791696548462, "eval_runtime": 220.5844, "eval_samples_per_second": 453.341, "eval_steps_per_second": 14.167, "step": 499500 }, { "epoch": 4.69, "learning_rate": 3.078958728252097e-06, "loss": 1.1883, "step": 500000 }, { "epoch": 4.69, "eval_loss": 1.0202616453170776, "eval_runtime": 220.5578, "eval_samples_per_second": 453.396, "eval_steps_per_second": 14.169, "step": 500000 }, { "epoch": 4.7, "learning_rate": 3.0320376869803496e-06, "loss": 1.1911, "step": 500500 }, { "epoch": 4.7, "eval_loss": 1.0237817764282227, "eval_runtime": 220.6247, "eval_samples_per_second": 453.258, "eval_steps_per_second": 14.164, "step": 500500 }, { "epoch": 4.7, "learning_rate": 2.9851166457086017e-06, "loss": 1.1923, "step": 501000 }, { "epoch": 4.7, "eval_loss": 1.0245826244354248, "eval_runtime": 220.6502, "eval_samples_per_second": 453.206, "eval_steps_per_second": 14.163, "step": 501000 }, { "epoch": 4.71, "learning_rate": 2.9381956044368537e-06, "loss": 1.1754, "step": 501500 }, { "epoch": 4.71, "eval_loss": 1.0203845500946045, "eval_runtime": 220.7296, "eval_samples_per_second": 453.043, "eval_steps_per_second": 14.158, "step": 501500 }, { "epoch": 4.71, "learning_rate": 2.891274563165106e-06, "loss": 1.1848, "step": 502000 }, { "epoch": 4.71, "eval_loss": 1.0270147323608398, "eval_runtime": 221.2106, "eval_samples_per_second": 452.058, "eval_steps_per_second": 14.127, "step": 502000 }, { "epoch": 4.72, "learning_rate": 2.844353521893358e-06, "loss": 1.1739, "step": 502500 }, { "epoch": 4.72, "eval_loss": 1.0190746784210205, "eval_runtime": 221.5269, "eval_samples_per_second": 451.412, "eval_steps_per_second": 14.107, "step": 502500 }, { "epoch": 4.72, "learning_rate": 2.79743248062161e-06, "loss": 1.1925, "step": 503000 }, { "epoch": 4.72, "eval_loss": 1.0178605318069458, "eval_runtime": 221.4578, "eval_samples_per_second": 451.553, "eval_steps_per_second": 14.111, "step": 503000 }, { "epoch": 4.72, "learning_rate": 2.750511439349862e-06, "loss": 1.1799, "step": 503500 }, { "epoch": 4.72, "eval_loss": 1.0173200368881226, "eval_runtime": 221.4615, "eval_samples_per_second": 451.546, "eval_steps_per_second": 14.111, "step": 503500 }, { "epoch": 4.73, "learning_rate": 2.703590398078114e-06, "loss": 1.1773, "step": 504000 }, { "epoch": 4.73, "eval_loss": 1.0217922925949097, "eval_runtime": 222.476, "eval_samples_per_second": 449.487, "eval_steps_per_second": 14.046, "step": 504000 }, { "epoch": 4.73, "learning_rate": 2.656669356806366e-06, "loss": 1.1675, "step": 504500 }, { "epoch": 4.73, "eval_loss": 1.0150542259216309, "eval_runtime": 242.0486, "eval_samples_per_second": 413.14, "eval_steps_per_second": 12.911, "step": 504500 }, { "epoch": 4.74, "learning_rate": 2.6097483155346186e-06, "loss": 1.1813, "step": 505000 }, { "epoch": 4.74, "eval_loss": 1.0178701877593994, "eval_runtime": 242.0285, "eval_samples_per_second": 413.175, "eval_steps_per_second": 12.912, "step": 505000 }, { "epoch": 4.74, "learning_rate": 2.5628272742628706e-06, "loss": 1.1726, "step": 505500 }, { "epoch": 4.74, "eval_loss": 1.0183017253875732, "eval_runtime": 242.2866, "eval_samples_per_second": 412.734, "eval_steps_per_second": 12.898, "step": 505500 }, { "epoch": 4.75, "learning_rate": 2.5159062329911226e-06, "loss": 1.1944, "step": 506000 }, { "epoch": 4.75, "eval_loss": 1.011798620223999, "eval_runtime": 242.0251, "eval_samples_per_second": 413.18, "eval_steps_per_second": 12.912, "step": 506000 }, { "epoch": 4.75, "learning_rate": 2.4689851917193747e-06, "loss": 1.1972, "step": 506500 }, { "epoch": 4.75, "eval_loss": 1.021371841430664, "eval_runtime": 242.3781, "eval_samples_per_second": 412.579, "eval_steps_per_second": 12.893, "step": 506500 }, { "epoch": 4.76, "learning_rate": 2.4220641504476267e-06, "loss": 1.1835, "step": 507000 }, { "epoch": 4.76, "eval_loss": 1.024190902709961, "eval_runtime": 243.2084, "eval_samples_per_second": 411.17, "eval_steps_per_second": 12.849, "step": 507000 }, { "epoch": 4.76, "learning_rate": 2.375143109175879e-06, "loss": 1.1807, "step": 507500 }, { "epoch": 4.76, "eval_loss": 1.0209712982177734, "eval_runtime": 247.1697, "eval_samples_per_second": 404.58, "eval_steps_per_second": 12.643, "step": 507500 }, { "epoch": 4.77, "learning_rate": 2.328222067904131e-06, "loss": 1.1801, "step": 508000 }, { "epoch": 4.77, "eval_loss": 1.0197240114212036, "eval_runtime": 245.9329, "eval_samples_per_second": 406.615, "eval_steps_per_second": 12.707, "step": 508000 }, { "epoch": 4.77, "learning_rate": 2.281301026632383e-06, "loss": 1.1707, "step": 508500 }, { "epoch": 4.77, "eval_loss": 1.017072319984436, "eval_runtime": 246.0676, "eval_samples_per_second": 406.392, "eval_steps_per_second": 12.7, "step": 508500 }, { "epoch": 4.78, "learning_rate": 2.234379985360635e-06, "loss": 1.1831, "step": 509000 }, { "epoch": 4.78, "eval_loss": 1.0189136266708374, "eval_runtime": 241.7862, "eval_samples_per_second": 413.589, "eval_steps_per_second": 12.925, "step": 509000 }, { "epoch": 4.78, "learning_rate": 2.1874589440888876e-06, "loss": 1.182, "step": 509500 }, { "epoch": 4.78, "eval_loss": 1.0172502994537354, "eval_runtime": 233.7568, "eval_samples_per_second": 427.795, "eval_steps_per_second": 13.369, "step": 509500 }, { "epoch": 4.79, "learning_rate": 2.1405379028171396e-06, "loss": 1.1946, "step": 510000 }, { "epoch": 4.79, "eval_loss": 1.0199698209762573, "eval_runtime": 233.9329, "eval_samples_per_second": 427.473, "eval_steps_per_second": 13.359, "step": 510000 }, { "epoch": 4.79, "learning_rate": 2.0936168615453916e-06, "loss": 1.1738, "step": 510500 }, { "epoch": 4.79, "eval_loss": 1.016062617301941, "eval_runtime": 235.9837, "eval_samples_per_second": 423.758, "eval_steps_per_second": 13.242, "step": 510500 }, { "epoch": 4.8, "learning_rate": 2.0466958202736436e-06, "loss": 1.1735, "step": 511000 }, { "epoch": 4.8, "eval_loss": 1.0233088731765747, "eval_runtime": 236.8841, "eval_samples_per_second": 422.147, "eval_steps_per_second": 13.192, "step": 511000 }, { "epoch": 4.8, "learning_rate": 1.9997747790018957e-06, "loss": 1.1771, "step": 511500 }, { "epoch": 4.8, "eval_loss": 1.0144411325454712, "eval_runtime": 236.4685, "eval_samples_per_second": 422.889, "eval_steps_per_second": 13.215, "step": 511500 }, { "epoch": 4.8, "learning_rate": 1.9528537377301477e-06, "loss": 1.165, "step": 512000 }, { "epoch": 4.8, "eval_loss": 1.0135992765426636, "eval_runtime": 233.2323, "eval_samples_per_second": 428.757, "eval_steps_per_second": 13.399, "step": 512000 }, { "epoch": 4.81, "learning_rate": 1.9059326964583999e-06, "loss": 1.1783, "step": 512500 }, { "epoch": 4.81, "eval_loss": 1.0253207683563232, "eval_runtime": 231.2367, "eval_samples_per_second": 432.457, "eval_steps_per_second": 13.514, "step": 512500 }, { "epoch": 4.81, "learning_rate": 1.859011655186652e-06, "loss": 1.1783, "step": 513000 }, { "epoch": 4.81, "eval_loss": 1.0126628875732422, "eval_runtime": 233.9645, "eval_samples_per_second": 427.415, "eval_steps_per_second": 13.357, "step": 513000 }, { "epoch": 4.82, "learning_rate": 1.812090613914904e-06, "loss": 1.1824, "step": 513500 }, { "epoch": 4.82, "eval_loss": 1.021931767463684, "eval_runtime": 232.7488, "eval_samples_per_second": 429.648, "eval_steps_per_second": 13.426, "step": 513500 }, { "epoch": 4.82, "learning_rate": 1.765169572643156e-06, "loss": 1.1553, "step": 514000 }, { "epoch": 4.82, "eval_loss": 1.015001654624939, "eval_runtime": 232.0708, "eval_samples_per_second": 430.903, "eval_steps_per_second": 13.466, "step": 514000 }, { "epoch": 4.83, "learning_rate": 1.7182485313714084e-06, "loss": 1.1799, "step": 514500 }, { "epoch": 4.83, "eval_loss": 1.0170458555221558, "eval_runtime": 231.7543, "eval_samples_per_second": 431.491, "eval_steps_per_second": 13.484, "step": 514500 }, { "epoch": 4.83, "learning_rate": 1.6713274900996604e-06, "loss": 1.1958, "step": 515000 }, { "epoch": 4.83, "eval_loss": 1.0164183378219604, "eval_runtime": 230.7078, "eval_samples_per_second": 433.449, "eval_steps_per_second": 13.545, "step": 515000 }, { "epoch": 4.84, "learning_rate": 1.6244064488279126e-06, "loss": 1.1946, "step": 515500 }, { "epoch": 4.84, "eval_loss": 1.016738772392273, "eval_runtime": 231.8187, "eval_samples_per_second": 431.372, "eval_steps_per_second": 13.48, "step": 515500 }, { "epoch": 4.84, "learning_rate": 1.5774854075561646e-06, "loss": 1.1803, "step": 516000 }, { "epoch": 4.84, "eval_loss": 1.0190505981445312, "eval_runtime": 229.9801, "eval_samples_per_second": 434.82, "eval_steps_per_second": 13.588, "step": 516000 }, { "epoch": 4.85, "learning_rate": 1.5305643662844166e-06, "loss": 1.1723, "step": 516500 }, { "epoch": 4.85, "eval_loss": 1.017173409461975, "eval_runtime": 230.1856, "eval_samples_per_second": 434.432, "eval_steps_per_second": 13.576, "step": 516500 }, { "epoch": 4.85, "learning_rate": 1.4836433250126687e-06, "loss": 1.1627, "step": 517000 }, { "epoch": 4.85, "eval_loss": 1.013454556465149, "eval_runtime": 229.3387, "eval_samples_per_second": 436.036, "eval_steps_per_second": 13.626, "step": 517000 }, { "epoch": 4.86, "learning_rate": 1.4367222837409209e-06, "loss": 1.1677, "step": 517500 }, { "epoch": 4.86, "eval_loss": 1.0167146921157837, "eval_runtime": 229.9528, "eval_samples_per_second": 434.872, "eval_steps_per_second": 13.59, "step": 517500 }, { "epoch": 4.86, "learning_rate": 1.389801242469173e-06, "loss": 1.1923, "step": 518000 }, { "epoch": 4.86, "eval_loss": 1.0137449502944946, "eval_runtime": 233.5878, "eval_samples_per_second": 428.105, "eval_steps_per_second": 13.378, "step": 518000 }, { "epoch": 4.87, "learning_rate": 1.3428802011974251e-06, "loss": 1.1911, "step": 518500 }, { "epoch": 4.87, "eval_loss": 1.0253547430038452, "eval_runtime": 227.612, "eval_samples_per_second": 439.344, "eval_steps_per_second": 13.73, "step": 518500 }, { "epoch": 4.87, "learning_rate": 1.2959591599256771e-06, "loss": 1.1708, "step": 519000 }, { "epoch": 4.87, "eval_loss": 1.018088936805725, "eval_runtime": 229.1156, "eval_samples_per_second": 436.461, "eval_steps_per_second": 13.639, "step": 519000 }, { "epoch": 4.88, "learning_rate": 1.2490381186539292e-06, "loss": 1.1737, "step": 519500 }, { "epoch": 4.88, "eval_loss": 1.0122038125991821, "eval_runtime": 234.5523, "eval_samples_per_second": 426.344, "eval_steps_per_second": 13.323, "step": 519500 }, { "epoch": 4.88, "learning_rate": 1.2021170773821814e-06, "loss": 1.1872, "step": 520000 }, { "epoch": 4.88, "eval_loss": 1.0127085447311401, "eval_runtime": 229.3109, "eval_samples_per_second": 436.089, "eval_steps_per_second": 13.628, "step": 520000 }, { "epoch": 4.88, "learning_rate": 1.1551960361104334e-06, "loss": 1.1734, "step": 520500 }, { "epoch": 4.88, "eval_loss": 1.0173479318618774, "eval_runtime": 228.8989, "eval_samples_per_second": 436.874, "eval_steps_per_second": 13.652, "step": 520500 }, { "epoch": 4.89, "learning_rate": 1.1082749948386854e-06, "loss": 1.179, "step": 521000 }, { "epoch": 4.89, "eval_loss": 1.0175164937973022, "eval_runtime": 223.9589, "eval_samples_per_second": 446.51, "eval_steps_per_second": 13.953, "step": 521000 }, { "epoch": 4.89, "learning_rate": 1.0613539535669376e-06, "loss": 1.1778, "step": 521500 }, { "epoch": 4.89, "eval_loss": 1.0166733264923096, "eval_runtime": 229.4526, "eval_samples_per_second": 435.82, "eval_steps_per_second": 13.619, "step": 521500 }, { "epoch": 4.9, "learning_rate": 1.0144329122951896e-06, "loss": 1.1732, "step": 522000 }, { "epoch": 4.9, "eval_loss": 1.0115753412246704, "eval_runtime": 229.4827, "eval_samples_per_second": 435.763, "eval_steps_per_second": 13.618, "step": 522000 }, { "epoch": 4.9, "learning_rate": 9.675118710234419e-07, "loss": 1.1635, "step": 522500 }, { "epoch": 4.9, "eval_loss": 1.0163120031356812, "eval_runtime": 227.8263, "eval_samples_per_second": 438.931, "eval_steps_per_second": 13.717, "step": 522500 }, { "epoch": 4.91, "learning_rate": 9.205908297516939e-07, "loss": 1.1677, "step": 523000 }, { "epoch": 4.91, "eval_loss": 1.0148788690567017, "eval_runtime": 222.1269, "eval_samples_per_second": 450.193, "eval_steps_per_second": 14.069, "step": 523000 }, { "epoch": 4.91, "learning_rate": 8.73669788479946e-07, "loss": 1.1967, "step": 523500 }, { "epoch": 4.91, "eval_loss": 1.010750412940979, "eval_runtime": 221.9772, "eval_samples_per_second": 450.497, "eval_steps_per_second": 14.078, "step": 523500 }, { "epoch": 4.92, "learning_rate": 8.26748747208198e-07, "loss": 1.1772, "step": 524000 }, { "epoch": 4.92, "eval_loss": 1.0138213634490967, "eval_runtime": 221.8206, "eval_samples_per_second": 450.815, "eval_steps_per_second": 14.088, "step": 524000 }, { "epoch": 4.92, "learning_rate": 7.798277059364501e-07, "loss": 1.1817, "step": 524500 }, { "epoch": 4.92, "eval_loss": 1.0085700750350952, "eval_runtime": 221.8601, "eval_samples_per_second": 450.735, "eval_steps_per_second": 14.085, "step": 524500 }, { "epoch": 4.93, "learning_rate": 7.329066646647023e-07, "loss": 1.1755, "step": 525000 }, { "epoch": 4.93, "eval_loss": 1.0190365314483643, "eval_runtime": 221.8179, "eval_samples_per_second": 450.82, "eval_steps_per_second": 14.088, "step": 525000 }, { "epoch": 4.93, "learning_rate": 6.859856233929544e-07, "loss": 1.1866, "step": 525500 }, { "epoch": 4.93, "eval_loss": 1.0140148401260376, "eval_runtime": 221.8188, "eval_samples_per_second": 450.818, "eval_steps_per_second": 14.088, "step": 525500 }, { "epoch": 4.94, "learning_rate": 6.390645821212064e-07, "loss": 1.1818, "step": 526000 }, { "epoch": 4.94, "eval_loss": 1.0161159038543701, "eval_runtime": 221.8238, "eval_samples_per_second": 450.808, "eval_steps_per_second": 14.088, "step": 526000 }, { "epoch": 4.94, "learning_rate": 5.921435408494586e-07, "loss": 1.1818, "step": 526500 }, { "epoch": 4.94, "eval_loss": 1.0107953548431396, "eval_runtime": 221.8183, "eval_samples_per_second": 450.819, "eval_steps_per_second": 14.088, "step": 526500 }, { "epoch": 4.95, "learning_rate": 5.452224995777106e-07, "loss": 1.1613, "step": 527000 }, { "epoch": 4.95, "eval_loss": 1.0149364471435547, "eval_runtime": 221.7834, "eval_samples_per_second": 450.89, "eval_steps_per_second": 14.09, "step": 527000 }, { "epoch": 4.95, "learning_rate": 4.983014583059628e-07, "loss": 1.1645, "step": 527500 }, { "epoch": 4.95, "eval_loss": 1.0124648809432983, "eval_runtime": 221.8169, "eval_samples_per_second": 450.822, "eval_steps_per_second": 14.088, "step": 527500 }, { "epoch": 4.95, "learning_rate": 4.513804170342148e-07, "loss": 1.193, "step": 528000 }, { "epoch": 4.95, "eval_loss": 1.009380578994751, "eval_runtime": 221.7526, "eval_samples_per_second": 450.953, "eval_steps_per_second": 14.092, "step": 528000 }, { "epoch": 4.96, "learning_rate": 4.04459375762467e-07, "loss": 1.1537, "step": 528500 }, { "epoch": 4.96, "eval_loss": 1.0124224424362183, "eval_runtime": 221.8595, "eval_samples_per_second": 450.736, "eval_steps_per_second": 14.085, "step": 528500 }, { "epoch": 4.96, "learning_rate": 3.57538334490719e-07, "loss": 1.1691, "step": 529000 }, { "epoch": 4.96, "eval_loss": 1.0114690065383911, "eval_runtime": 222.0421, "eval_samples_per_second": 450.365, "eval_steps_per_second": 14.074, "step": 529000 }, { "epoch": 4.97, "learning_rate": 3.1061729321897113e-07, "loss": 1.1643, "step": 529500 }, { "epoch": 4.97, "eval_loss": 1.0129092931747437, "eval_runtime": 227.2564, "eval_samples_per_second": 440.032, "eval_steps_per_second": 13.751, "step": 529500 }, { "epoch": 4.97, "learning_rate": 2.6369625194722325e-07, "loss": 1.1592, "step": 530000 }, { "epoch": 4.97, "eval_loss": 1.0124961137771606, "eval_runtime": 228.0531, "eval_samples_per_second": 438.494, "eval_steps_per_second": 13.703, "step": 530000 } ], "max_steps": 532810, "num_train_epochs": 5, "total_flos": 5.5361381292640083e+17, "trial_name": null, "trial_params": null }