{ "best_metric": 0.7345585823059082, "best_model_checkpoint": "/home/khalid/Documents/github_rep/MyProjects/CodeBase/training/output/checkpoint-1000000", "epoch": 1.0723469406083477, "global_step": 1000000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3e-05, "loss": 1.7484, "step": 500 }, { "epoch": 0.0, "learning_rate": 2.9969969969969972e-05, "loss": 1.6173, "step": 1000 }, { "epoch": 0.0, "learning_rate": 2.9939939939939944e-05, "loss": 1.5574, "step": 1500 }, { "epoch": 0.0, "learning_rate": 2.9909909909909908e-05, "loss": 1.5441, "step": 2000 }, { "epoch": 0.0, "learning_rate": 2.987987987987988e-05, "loss": 1.5047, "step": 2500 }, { "epoch": 0.0, "learning_rate": 2.984984984984985e-05, "loss": 1.4768, "step": 3000 }, { "epoch": 0.0, "learning_rate": 2.9819819819819822e-05, "loss": 1.4624, "step": 3500 }, { "epoch": 0.0, "learning_rate": 2.978978978978979e-05, "loss": 1.4313, "step": 4000 }, { "epoch": 0.0, "learning_rate": 2.975975975975976e-05, "loss": 1.4138, "step": 4500 }, { "epoch": 0.01, "learning_rate": 2.972972972972973e-05, "loss": 1.4181, "step": 5000 }, { "epoch": 0.01, "learning_rate": 2.96996996996997e-05, "loss": 1.3907, "step": 5500 }, { "epoch": 0.01, "learning_rate": 2.9669669669669673e-05, "loss": 1.377, "step": 6000 }, { "epoch": 0.01, "learning_rate": 2.9639699699699702e-05, "loss": 1.383, "step": 6500 }, { "epoch": 0.01, "learning_rate": 2.960972972972973e-05, "loss": 1.3738, "step": 7000 }, { "epoch": 0.01, "learning_rate": 2.95796996996997e-05, "loss": 1.3614, "step": 7500 }, { "epoch": 0.01, "learning_rate": 2.954966966966967e-05, "loss": 1.344, "step": 8000 }, { "epoch": 0.01, "learning_rate": 2.9519639639639642e-05, "loss": 1.3477, "step": 8500 }, { "epoch": 0.01, "learning_rate": 2.948960960960961e-05, "loss": 1.3436, "step": 9000 }, { "epoch": 0.01, "learning_rate": 2.9459639639639642e-05, "loss": 1.334, "step": 9500 }, { "epoch": 0.01, "learning_rate": 2.942966966966967e-05, "loss": 1.3165, "step": 10000 }, { "epoch": 0.01, "eval_loss": 1.2129223346710205, "eval_runtime": 577.4417, "eval_samples_per_second": 173.178, "eval_steps_per_second": 43.294, "step": 10000 }, { "epoch": 0.01, "learning_rate": 2.9399639639639643e-05, "loss": 1.3218, "step": 10500 }, { "epoch": 0.01, "learning_rate": 2.9369609609609608e-05, "loss": 1.3243, "step": 11000 }, { "epoch": 0.01, "learning_rate": 2.933957957957958e-05, "loss": 1.3016, "step": 11500 }, { "epoch": 0.01, "learning_rate": 2.930954954954955e-05, "loss": 1.3057, "step": 12000 }, { "epoch": 0.01, "learning_rate": 2.927951951951952e-05, "loss": 1.2973, "step": 12500 }, { "epoch": 0.01, "learning_rate": 2.9249489489489493e-05, "loss": 1.2911, "step": 13000 }, { "epoch": 0.01, "learning_rate": 2.9219459459459458e-05, "loss": 1.294, "step": 13500 }, { "epoch": 0.02, "learning_rate": 2.918942942942943e-05, "loss": 1.291, "step": 14000 }, { "epoch": 0.02, "learning_rate": 2.91593993993994e-05, "loss": 1.2678, "step": 14500 }, { "epoch": 0.02, "learning_rate": 2.9129369369369372e-05, "loss": 1.27, "step": 15000 }, { "epoch": 0.02, "learning_rate": 2.909933933933934e-05, "loss": 1.2418, "step": 15500 }, { "epoch": 0.02, "learning_rate": 2.906936936936937e-05, "loss": 1.2373, "step": 16000 }, { "epoch": 0.02, "learning_rate": 2.903933933933934e-05, "loss": 1.2567, "step": 16500 }, { "epoch": 0.02, "learning_rate": 2.900930930930931e-05, "loss": 1.2449, "step": 17000 }, { "epoch": 0.02, "learning_rate": 2.897927927927928e-05, "loss": 1.2554, "step": 17500 }, { "epoch": 0.02, "learning_rate": 2.894924924924925e-05, "loss": 1.2579, "step": 18000 }, { "epoch": 0.02, "learning_rate": 2.891927927927928e-05, "loss": 1.2351, "step": 18500 }, { "epoch": 0.02, "learning_rate": 2.888924924924925e-05, "loss": 1.2227, "step": 19000 }, { "epoch": 0.02, "learning_rate": 2.885921921921922e-05, "loss": 1.2436, "step": 19500 }, { "epoch": 0.02, "learning_rate": 2.882924924924925e-05, "loss": 1.2256, "step": 20000 }, { "epoch": 0.02, "eval_loss": 1.1297707557678223, "eval_runtime": 613.186, "eval_samples_per_second": 163.083, "eval_steps_per_second": 40.771, "step": 20000 }, { "epoch": 0.02, "learning_rate": 2.879921921921922e-05, "loss": 1.2092, "step": 20500 }, { "epoch": 0.02, "learning_rate": 2.8769189189189192e-05, "loss": 1.2248, "step": 21000 }, { "epoch": 0.02, "learning_rate": 2.873915915915916e-05, "loss": 1.2378, "step": 21500 }, { "epoch": 0.02, "learning_rate": 2.8709129129129128e-05, "loss": 1.2182, "step": 22000 }, { "epoch": 0.02, "learning_rate": 2.8679159159159157e-05, "loss": 1.2111, "step": 22500 }, { "epoch": 0.02, "learning_rate": 2.864912912912913e-05, "loss": 1.2088, "step": 23000 }, { "epoch": 0.03, "learning_rate": 2.86190990990991e-05, "loss": 1.2104, "step": 23500 }, { "epoch": 0.03, "learning_rate": 2.858906906906907e-05, "loss": 1.1984, "step": 24000 }, { "epoch": 0.03, "learning_rate": 2.8559039039039043e-05, "loss": 1.206, "step": 24500 }, { "epoch": 0.03, "learning_rate": 2.8529009009009007e-05, "loss": 1.1827, "step": 25000 }, { "epoch": 0.03, "learning_rate": 2.849897897897898e-05, "loss": 1.1953, "step": 25500 }, { "epoch": 0.03, "learning_rate": 2.8469009009009008e-05, "loss": 1.1964, "step": 26000 }, { "epoch": 0.03, "learning_rate": 2.843903903903904e-05, "loss": 1.2048, "step": 26500 }, { "epoch": 0.03, "learning_rate": 2.840900900900901e-05, "loss": 1.1847, "step": 27000 }, { "epoch": 0.03, "learning_rate": 2.837897897897898e-05, "loss": 1.1808, "step": 27500 }, { "epoch": 0.03, "learning_rate": 2.834894894894895e-05, "loss": 1.1685, "step": 28000 }, { "epoch": 0.03, "learning_rate": 2.831891891891892e-05, "loss": 1.1838, "step": 28500 }, { "epoch": 0.03, "learning_rate": 2.828888888888889e-05, "loss": 1.1876, "step": 29000 }, { "epoch": 0.03, "learning_rate": 2.825885885885886e-05, "loss": 1.1851, "step": 29500 }, { "epoch": 0.03, "learning_rate": 2.822882882882883e-05, "loss": 1.1683, "step": 30000 }, { "epoch": 0.03, "eval_loss": 1.090136170387268, "eval_runtime": 615.6857, "eval_samples_per_second": 162.421, "eval_steps_per_second": 40.605, "step": 30000 }, { "epoch": 0.03, "learning_rate": 2.8198798798798798e-05, "loss": 1.1725, "step": 30500 }, { "epoch": 0.03, "learning_rate": 2.816876876876877e-05, "loss": 1.1879, "step": 31000 }, { "epoch": 0.03, "learning_rate": 2.813873873873874e-05, "loss": 1.1635, "step": 31500 }, { "epoch": 0.03, "learning_rate": 2.810870870870871e-05, "loss": 1.1892, "step": 32000 }, { "epoch": 0.03, "learning_rate": 2.807873873873874e-05, "loss": 1.1675, "step": 32500 }, { "epoch": 0.04, "learning_rate": 2.804870870870871e-05, "loss": 1.1766, "step": 33000 }, { "epoch": 0.04, "learning_rate": 2.8018678678678677e-05, "loss": 1.1699, "step": 33500 }, { "epoch": 0.04, "learning_rate": 2.798864864864865e-05, "loss": 1.1573, "step": 34000 }, { "epoch": 0.04, "learning_rate": 2.7958678678678678e-05, "loss": 1.1695, "step": 34500 }, { "epoch": 0.04, "learning_rate": 2.792864864864865e-05, "loss": 1.1419, "step": 35000 }, { "epoch": 0.04, "learning_rate": 2.789861861861862e-05, "loss": 1.1523, "step": 35500 }, { "epoch": 0.04, "learning_rate": 2.7868588588588592e-05, "loss": 1.1544, "step": 36000 }, { "epoch": 0.04, "learning_rate": 2.783861861861862e-05, "loss": 1.1501, "step": 36500 }, { "epoch": 0.04, "learning_rate": 2.780864864864865e-05, "loss": 1.1583, "step": 37000 }, { "epoch": 0.04, "learning_rate": 2.777861861861862e-05, "loss": 1.1455, "step": 37500 }, { "epoch": 0.04, "learning_rate": 2.774858858858859e-05, "loss": 1.1276, "step": 38000 }, { "epoch": 0.04, "learning_rate": 2.7718558558558558e-05, "loss": 1.139, "step": 38500 }, { "epoch": 0.04, "learning_rate": 2.768852852852853e-05, "loss": 1.1363, "step": 39000 }, { "epoch": 0.04, "learning_rate": 2.7658498498498497e-05, "loss": 1.1287, "step": 39500 }, { "epoch": 0.04, "learning_rate": 2.762846846846847e-05, "loss": 1.1429, "step": 40000 }, { "epoch": 0.04, "eval_loss": 1.0513352155685425, "eval_runtime": 613.186, "eval_samples_per_second": 163.083, "eval_steps_per_second": 40.771, "step": 40000 }, { "epoch": 0.04, "learning_rate": 2.759843843843844e-05, "loss": 1.1433, "step": 40500 }, { "epoch": 0.04, "learning_rate": 2.756846846846847e-05, "loss": 1.1378, "step": 41000 }, { "epoch": 0.04, "learning_rate": 2.753843843843844e-05, "loss": 1.1398, "step": 41500 }, { "epoch": 0.05, "learning_rate": 2.750846846846847e-05, "loss": 1.1185, "step": 42000 }, { "epoch": 0.05, "learning_rate": 2.74784984984985e-05, "loss": 1.1246, "step": 42500 }, { "epoch": 0.05, "learning_rate": 2.744846846846847e-05, "loss": 1.128, "step": 43000 }, { "epoch": 0.05, "learning_rate": 2.741843843843844e-05, "loss": 1.1205, "step": 43500 }, { "epoch": 0.05, "learning_rate": 2.738840840840841e-05, "loss": 1.1289, "step": 44000 }, { "epoch": 0.05, "learning_rate": 2.7358378378378378e-05, "loss": 1.1206, "step": 44500 }, { "epoch": 0.05, "learning_rate": 2.732834834834835e-05, "loss": 1.1348, "step": 45000 }, { "epoch": 0.05, "learning_rate": 2.729831831831832e-05, "loss": 1.1181, "step": 45500 }, { "epoch": 0.05, "learning_rate": 2.726828828828829e-05, "loss": 1.1186, "step": 46000 }, { "epoch": 0.05, "learning_rate": 2.7238258258258257e-05, "loss": 1.1241, "step": 46500 }, { "epoch": 0.05, "learning_rate": 2.7208228228228228e-05, "loss": 1.1175, "step": 47000 }, { "epoch": 0.05, "learning_rate": 2.717825825825826e-05, "loss": 1.1179, "step": 47500 }, { "epoch": 0.05, "learning_rate": 2.714822822822823e-05, "loss": 1.1185, "step": 48000 }, { "epoch": 0.05, "learning_rate": 2.71181981981982e-05, "loss": 1.1051, "step": 48500 }, { "epoch": 0.05, "learning_rate": 2.7088168168168168e-05, "loss": 1.1127, "step": 49000 }, { "epoch": 0.05, "learning_rate": 2.705813813813814e-05, "loss": 1.1079, "step": 49500 }, { "epoch": 0.05, "learning_rate": 2.7028108108108107e-05, "loss": 1.09, "step": 50000 }, { "epoch": 0.05, "eval_loss": 1.0235236883163452, "eval_runtime": 630.7694, "eval_samples_per_second": 158.537, "eval_steps_per_second": 39.634, "step": 50000 }, { "epoch": 0.05, "learning_rate": 2.699807807807808e-05, "loss": 1.1017, "step": 50500 }, { "epoch": 0.05, "learning_rate": 2.6968048048048047e-05, "loss": 1.1153, "step": 51000 }, { "epoch": 0.06, "learning_rate": 2.693807807807808e-05, "loss": 1.1244, "step": 51500 }, { "epoch": 0.06, "learning_rate": 2.6908108108108112e-05, "loss": 1.1129, "step": 52000 }, { "epoch": 0.06, "learning_rate": 2.6878078078078077e-05, "loss": 1.1032, "step": 52500 }, { "epoch": 0.06, "learning_rate": 2.6848048048048048e-05, "loss": 1.1024, "step": 53000 }, { "epoch": 0.06, "learning_rate": 2.681801801801802e-05, "loss": 1.1119, "step": 53500 }, { "epoch": 0.06, "learning_rate": 2.678798798798799e-05, "loss": 1.0847, "step": 54000 }, { "epoch": 0.06, "learning_rate": 2.675795795795796e-05, "loss": 1.1061, "step": 54500 }, { "epoch": 0.06, "learning_rate": 2.6727927927927927e-05, "loss": 1.0983, "step": 55000 }, { "epoch": 0.06, "learning_rate": 2.6697897897897898e-05, "loss": 1.0909, "step": 55500 }, { "epoch": 0.06, "learning_rate": 2.666786786786787e-05, "loss": 1.1001, "step": 56000 }, { "epoch": 0.06, "learning_rate": 2.66378978978979e-05, "loss": 1.0911, "step": 56500 }, { "epoch": 0.06, "learning_rate": 2.660786786786787e-05, "loss": 1.0993, "step": 57000 }, { "epoch": 0.06, "learning_rate": 2.6577837837837838e-05, "loss": 1.09, "step": 57500 }, { "epoch": 0.06, "learning_rate": 2.654780780780781e-05, "loss": 1.0906, "step": 58000 }, { "epoch": 0.06, "learning_rate": 2.6517897897897897e-05, "loss": 1.089, "step": 58500 }, { "epoch": 0.06, "learning_rate": 2.6487867867867868e-05, "loss": 1.097, "step": 59000 }, { "epoch": 0.06, "learning_rate": 2.645783783783784e-05, "loss": 1.0785, "step": 59500 }, { "epoch": 0.06, "learning_rate": 2.642780780780781e-05, "loss": 1.0867, "step": 60000 }, { "epoch": 0.06, "eval_loss": 1.0042845010757446, "eval_runtime": 612.5588, "eval_samples_per_second": 163.25, "eval_steps_per_second": 40.812, "step": 60000 }, { "epoch": 0.06, "learning_rate": 2.639777777777778e-05, "loss": 1.0828, "step": 60500 }, { "epoch": 0.07, "learning_rate": 2.6367747747747747e-05, "loss": 1.0736, "step": 61000 }, { "epoch": 0.07, "learning_rate": 2.6337717717717718e-05, "loss": 1.0702, "step": 61500 }, { "epoch": 0.07, "learning_rate": 2.630768768768769e-05, "loss": 1.0765, "step": 62000 }, { "epoch": 0.07, "learning_rate": 2.627771771771772e-05, "loss": 1.0801, "step": 62500 }, { "epoch": 0.07, "learning_rate": 2.624768768768769e-05, "loss": 1.0709, "step": 63000 }, { "epoch": 0.07, "learning_rate": 2.621771771771772e-05, "loss": 1.0813, "step": 63500 }, { "epoch": 0.07, "learning_rate": 2.618768768768769e-05, "loss": 1.072, "step": 64000 }, { "epoch": 0.07, "learning_rate": 2.615765765765766e-05, "loss": 1.0789, "step": 64500 }, { "epoch": 0.07, "learning_rate": 2.6127627627627627e-05, "loss": 1.0926, "step": 65000 }, { "epoch": 0.07, "learning_rate": 2.6097597597597598e-05, "loss": 1.0765, "step": 65500 }, { "epoch": 0.07, "learning_rate": 2.606756756756757e-05, "loss": 1.0673, "step": 66000 }, { "epoch": 0.07, "learning_rate": 2.60375975975976e-05, "loss": 1.075, "step": 66500 }, { "epoch": 0.07, "learning_rate": 2.6007567567567567e-05, "loss": 1.0678, "step": 67000 }, { "epoch": 0.07, "learning_rate": 2.5977537537537538e-05, "loss": 1.0645, "step": 67500 }, { "epoch": 0.07, "learning_rate": 2.594750750750751e-05, "loss": 1.0708, "step": 68000 }, { "epoch": 0.07, "learning_rate": 2.591753753753754e-05, "loss": 1.0746, "step": 68500 }, { "epoch": 0.07, "learning_rate": 2.588750750750751e-05, "loss": 1.0644, "step": 69000 }, { "epoch": 0.07, "learning_rate": 2.5857477477477478e-05, "loss": 1.0572, "step": 69500 }, { "epoch": 0.08, "learning_rate": 2.5827447447447446e-05, "loss": 1.0777, "step": 70000 }, { "epoch": 0.08, "eval_loss": 0.989311158657074, "eval_runtime": 626.1693, "eval_samples_per_second": 159.701, "eval_steps_per_second": 39.925, "step": 70000 }, { "epoch": 0.08, "learning_rate": 2.5797477477477475e-05, "loss": 1.0623, "step": 70500 }, { "epoch": 0.08, "learning_rate": 2.5767447447447447e-05, "loss": 1.0469, "step": 71000 }, { "epoch": 0.08, "learning_rate": 2.5737417417417418e-05, "loss": 1.0564, "step": 71500 }, { "epoch": 0.08, "learning_rate": 2.570738738738739e-05, "loss": 1.0434, "step": 72000 }, { "epoch": 0.08, "learning_rate": 2.567735735735736e-05, "loss": 1.0593, "step": 72500 }, { "epoch": 0.08, "learning_rate": 2.5647327327327325e-05, "loss": 1.0587, "step": 73000 }, { "epoch": 0.08, "learning_rate": 2.5617297297297297e-05, "loss": 1.0507, "step": 73500 }, { "epoch": 0.08, "learning_rate": 2.5587267267267268e-05, "loss": 1.057, "step": 74000 }, { "epoch": 0.08, "learning_rate": 2.5557297297297297e-05, "loss": 1.0607, "step": 74500 }, { "epoch": 0.08, "learning_rate": 2.552726726726727e-05, "loss": 1.0645, "step": 75000 }, { "epoch": 0.08, "learning_rate": 2.549723723723724e-05, "loss": 1.0603, "step": 75500 }, { "epoch": 0.08, "learning_rate": 2.5467207207207208e-05, "loss": 1.0407, "step": 76000 }, { "epoch": 0.08, "learning_rate": 2.5437177177177176e-05, "loss": 1.0573, "step": 76500 }, { "epoch": 0.08, "learning_rate": 2.5407147147147148e-05, "loss": 1.0331, "step": 77000 }, { "epoch": 0.08, "learning_rate": 2.5377177177177177e-05, "loss": 1.0456, "step": 77500 }, { "epoch": 0.08, "learning_rate": 2.5347147147147148e-05, "loss": 1.0586, "step": 78000 }, { "epoch": 0.08, "learning_rate": 2.5317117117117116e-05, "loss": 1.0591, "step": 78500 }, { "epoch": 0.08, "learning_rate": 2.5287087087087088e-05, "loss": 1.0519, "step": 79000 }, { "epoch": 0.09, "learning_rate": 2.525705705705706e-05, "loss": 1.0549, "step": 79500 }, { "epoch": 0.09, "learning_rate": 2.5227027027027027e-05, "loss": 1.0588, "step": 80000 }, { "epoch": 0.09, "eval_loss": 0.9711708426475525, "eval_runtime": 621.1495, "eval_samples_per_second": 160.992, "eval_steps_per_second": 40.248, "step": 80000 }, { "epoch": 0.09, "learning_rate": 2.5196996996997e-05, "loss": 1.0534, "step": 80500 }, { "epoch": 0.09, "learning_rate": 2.5166966966966966e-05, "loss": 1.0529, "step": 81000 }, { "epoch": 0.09, "learning_rate": 2.5136996996996996e-05, "loss": 1.0605, "step": 81500 }, { "epoch": 0.09, "learning_rate": 2.5106966966966967e-05, "loss": 1.0639, "step": 82000 }, { "epoch": 0.09, "learning_rate": 2.507693693693694e-05, "loss": 1.0455, "step": 82500 }, { "epoch": 0.09, "learning_rate": 2.504690690690691e-05, "loss": 1.042, "step": 83000 }, { "epoch": 0.09, "learning_rate": 2.501693693693694e-05, "loss": 1.0267, "step": 83500 }, { "epoch": 0.09, "learning_rate": 2.498690690690691e-05, "loss": 1.0534, "step": 84000 }, { "epoch": 0.09, "learning_rate": 2.4956876876876875e-05, "loss": 1.0345, "step": 84500 }, { "epoch": 0.09, "learning_rate": 2.4926906906906908e-05, "loss": 1.0255, "step": 85000 }, { "epoch": 0.09, "learning_rate": 2.4896876876876876e-05, "loss": 1.0277, "step": 85500 }, { "epoch": 0.09, "learning_rate": 2.4866846846846847e-05, "loss": 1.0501, "step": 86000 }, { "epoch": 0.09, "learning_rate": 2.483681681681682e-05, "loss": 1.0417, "step": 86500 }, { "epoch": 0.09, "learning_rate": 2.4806846846846848e-05, "loss": 1.0324, "step": 87000 }, { "epoch": 0.09, "learning_rate": 2.477681681681682e-05, "loss": 1.0552, "step": 87500 }, { "epoch": 0.09, "learning_rate": 2.4746786786786787e-05, "loss": 1.038, "step": 88000 }, { "epoch": 0.09, "learning_rate": 2.471675675675676e-05, "loss": 1.0434, "step": 88500 }, { "epoch": 0.1, "learning_rate": 2.4686726726726726e-05, "loss": 1.0349, "step": 89000 }, { "epoch": 0.1, "learning_rate": 2.4656696696696698e-05, "loss": 1.0166, "step": 89500 }, { "epoch": 0.1, "learning_rate": 2.4626666666666666e-05, "loss": 1.0389, "step": 90000 }, { "epoch": 0.1, "eval_loss": 0.9617297649383545, "eval_runtime": 593.47, "eval_samples_per_second": 168.501, "eval_steps_per_second": 42.125, "step": 90000 }, { "epoch": 0.1, "learning_rate": 2.4596636636636637e-05, "loss": 1.0382, "step": 90500 }, { "epoch": 0.1, "learning_rate": 2.456660660660661e-05, "loss": 1.0389, "step": 91000 }, { "epoch": 0.1, "learning_rate": 2.4536576576576577e-05, "loss": 1.0397, "step": 91500 }, { "epoch": 0.1, "learning_rate": 2.4506546546546548e-05, "loss": 1.028, "step": 92000 }, { "epoch": 0.1, "learning_rate": 2.4476516516516516e-05, "loss": 1.0317, "step": 92500 }, { "epoch": 0.1, "learning_rate": 2.4446546546546545e-05, "loss": 1.0281, "step": 93000 }, { "epoch": 0.1, "learning_rate": 2.4416516516516517e-05, "loss": 1.0385, "step": 93500 }, { "epoch": 0.1, "learning_rate": 2.4386486486486488e-05, "loss": 1.0183, "step": 94000 }, { "epoch": 0.1, "learning_rate": 2.435645645645646e-05, "loss": 1.0204, "step": 94500 }, { "epoch": 0.1, "learning_rate": 2.432648648648649e-05, "loss": 1.044, "step": 95000 }, { "epoch": 0.1, "learning_rate": 2.4296516516516518e-05, "loss": 1.0402, "step": 95500 }, { "epoch": 0.1, "learning_rate": 2.426648648648649e-05, "loss": 1.0346, "step": 96000 }, { "epoch": 0.1, "learning_rate": 2.4236456456456457e-05, "loss": 1.0156, "step": 96500 }, { "epoch": 0.1, "learning_rate": 2.4206426426426425e-05, "loss": 1.0054, "step": 97000 }, { "epoch": 0.1, "learning_rate": 2.4176396396396397e-05, "loss": 1.0276, "step": 97500 }, { "epoch": 0.11, "learning_rate": 2.4146366366366368e-05, "loss": 1.022, "step": 98000 }, { "epoch": 0.11, "learning_rate": 2.4116336336336336e-05, "loss": 1.0351, "step": 98500 }, { "epoch": 0.11, "learning_rate": 2.4086306306306307e-05, "loss": 1.0306, "step": 99000 }, { "epoch": 0.11, "learning_rate": 2.4056276276276275e-05, "loss": 1.0183, "step": 99500 }, { "epoch": 0.11, "learning_rate": 2.4026246246246247e-05, "loss": 1.0245, "step": 100000 }, { "epoch": 0.11, "eval_loss": 0.9445381760597229, "eval_runtime": 618.5347, "eval_samples_per_second": 161.672, "eval_steps_per_second": 40.418, "step": 100000 }, { "epoch": 0.11, "learning_rate": 2.3996216216216218e-05, "loss": 1.0274, "step": 100500 }, { "epoch": 0.11, "learning_rate": 2.3966186186186186e-05, "loss": 1.007, "step": 101000 }, { "epoch": 0.11, "learning_rate": 2.3936276276276276e-05, "loss": 1.021, "step": 101500 }, { "epoch": 0.11, "learning_rate": 2.3906246246246245e-05, "loss": 1.0176, "step": 102000 }, { "epoch": 0.11, "learning_rate": 2.3876216216216216e-05, "loss": 1.0279, "step": 102500 }, { "epoch": 0.11, "learning_rate": 2.3846186186186187e-05, "loss": 1.0051, "step": 103000 }, { "epoch": 0.11, "learning_rate": 2.3816216216216216e-05, "loss": 1.0184, "step": 103500 }, { "epoch": 0.11, "learning_rate": 2.3786186186186188e-05, "loss": 1.0125, "step": 104000 }, { "epoch": 0.11, "learning_rate": 2.375615615615616e-05, "loss": 1.0202, "step": 104500 }, { "epoch": 0.11, "learning_rate": 2.3726126126126124e-05, "loss": 1.0255, "step": 105000 }, { "epoch": 0.11, "learning_rate": 2.3696096096096095e-05, "loss": 0.9965, "step": 105500 }, { "epoch": 0.11, "learning_rate": 2.3666066066066067e-05, "loss": 1.0157, "step": 106000 }, { "epoch": 0.11, "learning_rate": 2.3636036036036038e-05, "loss": 1.0168, "step": 106500 }, { "epoch": 0.11, "learning_rate": 2.360600600600601e-05, "loss": 1.012, "step": 107000 }, { "epoch": 0.12, "learning_rate": 2.3575975975975974e-05, "loss": 1.0102, "step": 107500 }, { "epoch": 0.12, "learning_rate": 2.3545945945945945e-05, "loss": 1.0161, "step": 108000 }, { "epoch": 0.12, "learning_rate": 2.3515915915915917e-05, "loss": 0.9957, "step": 108500 }, { "epoch": 0.12, "learning_rate": 2.3485945945945946e-05, "loss": 1.0009, "step": 109000 }, { "epoch": 0.12, "learning_rate": 2.3455915915915917e-05, "loss": 1.0096, "step": 109500 }, { "epoch": 0.12, "learning_rate": 2.3425885885885885e-05, "loss": 1.0146, "step": 110000 }, { "epoch": 0.12, "eval_loss": 0.934283435344696, "eval_runtime": 601.7192, "eval_samples_per_second": 166.19, "eval_steps_per_second": 41.548, "step": 110000 }, { "epoch": 0.12, "learning_rate": 2.3395975975975976e-05, "loss": 1.0183, "step": 110500 }, { "epoch": 0.12, "learning_rate": 2.3365945945945947e-05, "loss": 1.0182, "step": 111000 }, { "epoch": 0.12, "learning_rate": 2.3335915915915915e-05, "loss": 1.0091, "step": 111500 }, { "epoch": 0.12, "learning_rate": 2.3305885885885887e-05, "loss": 1.0111, "step": 112000 }, { "epoch": 0.12, "learning_rate": 2.3275855855855858e-05, "loss": 1.0015, "step": 112500 }, { "epoch": 0.12, "learning_rate": 2.3245825825825826e-05, "loss": 1.0082, "step": 113000 }, { "epoch": 0.12, "learning_rate": 2.3215795795795794e-05, "loss": 0.9918, "step": 113500 }, { "epoch": 0.12, "learning_rate": 2.3185765765765765e-05, "loss": 1.0009, "step": 114000 }, { "epoch": 0.12, "learning_rate": 2.3155735735735737e-05, "loss": 1.0195, "step": 114500 }, { "epoch": 0.12, "learning_rate": 2.3125705705705708e-05, "loss": 1.0189, "step": 115000 }, { "epoch": 0.12, "learning_rate": 2.3095675675675676e-05, "loss": 1.0038, "step": 115500 }, { "epoch": 0.12, "learning_rate": 2.3065645645645644e-05, "loss": 0.9963, "step": 116000 }, { "epoch": 0.12, "learning_rate": 2.3035615615615616e-05, "loss": 1.0125, "step": 116500 }, { "epoch": 0.13, "learning_rate": 2.3005585585585587e-05, "loss": 0.988, "step": 117000 }, { "epoch": 0.13, "learning_rate": 2.297555555555556e-05, "loss": 0.9997, "step": 117500 }, { "epoch": 0.13, "learning_rate": 2.2945525525525523e-05, "loss": 0.9983, "step": 118000 }, { "epoch": 0.13, "learning_rate": 2.291555555555556e-05, "loss": 1.0094, "step": 118500 }, { "epoch": 0.13, "learning_rate": 2.2885585585585588e-05, "loss": 0.9988, "step": 119000 }, { "epoch": 0.13, "learning_rate": 2.2855555555555556e-05, "loss": 0.9928, "step": 119500 }, { "epoch": 0.13, "learning_rate": 2.2825525525525524e-05, "loss": 0.9951, "step": 120000 }, { "epoch": 0.13, "eval_loss": 0.9260998964309692, "eval_runtime": 605.8053, "eval_samples_per_second": 165.07, "eval_steps_per_second": 41.267, "step": 120000 }, { "epoch": 0.13, "learning_rate": 2.2795495495495496e-05, "loss": 0.9923, "step": 120500 }, { "epoch": 0.13, "learning_rate": 2.2765465465465467e-05, "loss": 1.0081, "step": 121000 }, { "epoch": 0.13, "learning_rate": 2.2735495495495496e-05, "loss": 1.0009, "step": 121500 }, { "epoch": 0.13, "learning_rate": 2.2705465465465464e-05, "loss": 0.9929, "step": 122000 }, { "epoch": 0.13, "learning_rate": 2.2675435435435436e-05, "loss": 0.9956, "step": 122500 }, { "epoch": 0.13, "learning_rate": 2.2645405405405407e-05, "loss": 1.0, "step": 123000 }, { "epoch": 0.13, "learning_rate": 2.2615435435435436e-05, "loss": 0.9927, "step": 123500 }, { "epoch": 0.13, "learning_rate": 2.2585405405405408e-05, "loss": 0.9882, "step": 124000 }, { "epoch": 0.13, "learning_rate": 2.2555375375375376e-05, "loss": 0.9915, "step": 124500 }, { "epoch": 0.13, "learning_rate": 2.2525345345345344e-05, "loss": 0.9897, "step": 125000 }, { "epoch": 0.13, "learning_rate": 2.2495315315315315e-05, "loss": 1.0072, "step": 125500 }, { "epoch": 0.14, "learning_rate": 2.2465285285285286e-05, "loss": 0.9896, "step": 126000 }, { "epoch": 0.14, "learning_rate": 2.2435315315315316e-05, "loss": 0.9879, "step": 126500 }, { "epoch": 0.14, "learning_rate": 2.2405285285285287e-05, "loss": 0.9952, "step": 127000 }, { "epoch": 0.14, "learning_rate": 2.237525525525526e-05, "loss": 0.9748, "step": 127500 }, { "epoch": 0.14, "learning_rate": 2.2345225225225223e-05, "loss": 0.9967, "step": 128000 }, { "epoch": 0.14, "learning_rate": 2.2315195195195194e-05, "loss": 1.0016, "step": 128500 }, { "epoch": 0.14, "learning_rate": 2.2285165165165166e-05, "loss": 0.9801, "step": 129000 }, { "epoch": 0.14, "learning_rate": 2.2255135135135137e-05, "loss": 0.9876, "step": 129500 }, { "epoch": 0.14, "learning_rate": 2.2225165165165166e-05, "loss": 0.978, "step": 130000 }, { "epoch": 0.14, "eval_loss": 0.9173310399055481, "eval_runtime": 602.5766, "eval_samples_per_second": 165.954, "eval_steps_per_second": 41.489, "step": 130000 }, { "epoch": 0.14, "learning_rate": 2.2195135135135138e-05, "loss": 0.9867, "step": 130500 }, { "epoch": 0.14, "learning_rate": 2.2165105105105106e-05, "loss": 0.9824, "step": 131000 }, { "epoch": 0.14, "learning_rate": 2.2135075075075074e-05, "loss": 0.9877, "step": 131500 }, { "epoch": 0.14, "learning_rate": 2.2105045045045045e-05, "loss": 0.9828, "step": 132000 }, { "epoch": 0.14, "learning_rate": 2.2075015015015017e-05, "loss": 0.9772, "step": 132500 }, { "epoch": 0.14, "learning_rate": 2.2044984984984985e-05, "loss": 0.9906, "step": 133000 }, { "epoch": 0.14, "learning_rate": 2.2015015015015014e-05, "loss": 0.9847, "step": 133500 }, { "epoch": 0.14, "learning_rate": 2.1984984984984985e-05, "loss": 0.9826, "step": 134000 }, { "epoch": 0.14, "learning_rate": 2.195507507507508e-05, "loss": 0.9903, "step": 134500 }, { "epoch": 0.14, "learning_rate": 2.1925045045045043e-05, "loss": 0.9907, "step": 135000 }, { "epoch": 0.15, "learning_rate": 2.1895015015015015e-05, "loss": 0.9909, "step": 135500 }, { "epoch": 0.15, "learning_rate": 2.1864984984984986e-05, "loss": 0.9906, "step": 136000 }, { "epoch": 0.15, "learning_rate": 2.1834954954954958e-05, "loss": 0.9933, "step": 136500 }, { "epoch": 0.15, "learning_rate": 2.1804924924924922e-05, "loss": 0.9754, "step": 137000 }, { "epoch": 0.15, "learning_rate": 2.1774894894894894e-05, "loss": 0.9774, "step": 137500 }, { "epoch": 0.15, "learning_rate": 2.1744864864864865e-05, "loss": 0.9889, "step": 138000 }, { "epoch": 0.15, "learning_rate": 2.1714834834834836e-05, "loss": 0.9972, "step": 138500 }, { "epoch": 0.15, "learning_rate": 2.1684804804804808e-05, "loss": 0.9852, "step": 139000 }, { "epoch": 0.15, "learning_rate": 2.1654834834834837e-05, "loss": 0.9615, "step": 139500 }, { "epoch": 0.15, "learning_rate": 2.1624804804804805e-05, "loss": 0.9865, "step": 140000 }, { "epoch": 0.15, "eval_loss": 0.909109354019165, "eval_runtime": 589.6128, "eval_samples_per_second": 169.603, "eval_steps_per_second": 42.401, "step": 140000 }, { "epoch": 0.15, "learning_rate": 2.1594774774774773e-05, "loss": 0.9732, "step": 140500 }, { "epoch": 0.15, "learning_rate": 2.1564744744744744e-05, "loss": 0.9657, "step": 141000 }, { "epoch": 0.15, "learning_rate": 2.1534714714714716e-05, "loss": 0.9651, "step": 141500 }, { "epoch": 0.15, "learning_rate": 2.1504684684684687e-05, "loss": 0.9913, "step": 142000 }, { "epoch": 0.15, "learning_rate": 2.1474654654654655e-05, "loss": 0.9685, "step": 142500 }, { "epoch": 0.15, "learning_rate": 2.1444624624624623e-05, "loss": 0.9663, "step": 143000 }, { "epoch": 0.15, "learning_rate": 2.1414594594594595e-05, "loss": 0.9743, "step": 143500 }, { "epoch": 0.15, "learning_rate": 2.1384564564564566e-05, "loss": 0.9723, "step": 144000 }, { "epoch": 0.15, "learning_rate": 2.1354534534534534e-05, "loss": 0.9815, "step": 144500 }, { "epoch": 0.16, "learning_rate": 2.1324564564564563e-05, "loss": 0.9714, "step": 145000 }, { "epoch": 0.16, "learning_rate": 2.1294534534534535e-05, "loss": 0.9734, "step": 145500 }, { "epoch": 0.16, "learning_rate": 2.1264504504504506e-05, "loss": 0.9679, "step": 146000 }, { "epoch": 0.16, "learning_rate": 2.1234474474474474e-05, "loss": 0.9697, "step": 146500 }, { "epoch": 0.16, "learning_rate": 2.1204444444444445e-05, "loss": 0.9941, "step": 147000 }, { "epoch": 0.16, "learning_rate": 2.1174474474474475e-05, "loss": 0.9786, "step": 147500 }, { "epoch": 0.16, "learning_rate": 2.1144444444444443e-05, "loss": 0.97, "step": 148000 }, { "epoch": 0.16, "learning_rate": 2.1114414414414414e-05, "loss": 0.9672, "step": 148500 }, { "epoch": 0.16, "learning_rate": 2.1084384384384385e-05, "loss": 0.9661, "step": 149000 }, { "epoch": 0.16, "learning_rate": 2.1054354354354357e-05, "loss": 0.9867, "step": 149500 }, { "epoch": 0.16, "learning_rate": 2.1024324324324325e-05, "loss": 0.972, "step": 150000 }, { "epoch": 0.16, "eval_loss": 0.8991417288780212, "eval_runtime": 602.234, "eval_samples_per_second": 166.048, "eval_steps_per_second": 41.512, "step": 150000 }, { "epoch": 0.16, "learning_rate": 2.0994294294294293e-05, "loss": 0.9681, "step": 150500 }, { "epoch": 0.16, "learning_rate": 2.0964264264264264e-05, "loss": 0.9601, "step": 151000 }, { "epoch": 0.16, "learning_rate": 2.0934294294294293e-05, "loss": 0.955, "step": 151500 }, { "epoch": 0.16, "learning_rate": 2.0904264264264265e-05, "loss": 0.9703, "step": 152000 }, { "epoch": 0.16, "learning_rate": 2.0874234234234236e-05, "loss": 0.9535, "step": 152500 }, { "epoch": 0.16, "learning_rate": 2.0844204204204204e-05, "loss": 0.968, "step": 153000 }, { "epoch": 0.16, "learning_rate": 2.0814174174174172e-05, "loss": 0.9737, "step": 153500 }, { "epoch": 0.17, "learning_rate": 2.0784204204204205e-05, "loss": 0.9714, "step": 154000 }, { "epoch": 0.17, "learning_rate": 2.0754174174174173e-05, "loss": 0.9737, "step": 154500 }, { "epoch": 0.17, "learning_rate": 2.0724144144144144e-05, "loss": 0.955, "step": 155000 }, { "epoch": 0.17, "learning_rate": 2.0694114114114116e-05, "loss": 0.9677, "step": 155500 }, { "epoch": 0.17, "learning_rate": 2.0664084084084084e-05, "loss": 0.964, "step": 156000 }, { "epoch": 0.17, "learning_rate": 2.0634054054054055e-05, "loss": 0.9675, "step": 156500 }, { "epoch": 0.17, "learning_rate": 2.0604024024024023e-05, "loss": 0.9705, "step": 157000 }, { "epoch": 0.17, "learning_rate": 2.0573993993993994e-05, "loss": 0.9634, "step": 157500 }, { "epoch": 0.17, "learning_rate": 2.0544024024024024e-05, "loss": 0.9583, "step": 158000 }, { "epoch": 0.17, "learning_rate": 2.0513993993993995e-05, "loss": 0.9614, "step": 158500 }, { "epoch": 0.17, "learning_rate": 2.0483963963963963e-05, "loss": 0.9707, "step": 159000 }, { "epoch": 0.17, "learning_rate": 2.0453933933933934e-05, "loss": 0.95, "step": 159500 }, { "epoch": 0.17, "learning_rate": 2.0423903903903906e-05, "loss": 0.9673, "step": 160000 }, { "epoch": 0.17, "eval_loss": 0.8937882781028748, "eval_runtime": 585.5551, "eval_samples_per_second": 170.778, "eval_steps_per_second": 42.695, "step": 160000 }, { "epoch": 0.17, "learning_rate": 2.0393873873873874e-05, "loss": 0.9509, "step": 160500 }, { "epoch": 0.17, "learning_rate": 2.0363843843843842e-05, "loss": 0.9537, "step": 161000 }, { "epoch": 0.17, "learning_rate": 2.0333813813813813e-05, "loss": 0.9528, "step": 161500 }, { "epoch": 0.17, "learning_rate": 2.0303903903903907e-05, "loss": 0.958, "step": 162000 }, { "epoch": 0.17, "learning_rate": 2.0273873873873875e-05, "loss": 0.9773, "step": 162500 }, { "epoch": 0.17, "learning_rate": 2.0243843843843843e-05, "loss": 0.963, "step": 163000 }, { "epoch": 0.18, "learning_rate": 2.0213813813813814e-05, "loss": 0.9617, "step": 163500 }, { "epoch": 0.18, "learning_rate": 2.0183843843843844e-05, "loss": 0.9572, "step": 164000 }, { "epoch": 0.18, "learning_rate": 2.0153813813813815e-05, "loss": 0.9535, "step": 164500 }, { "epoch": 0.18, "learning_rate": 2.0123783783783783e-05, "loss": 0.9599, "step": 165000 }, { "epoch": 0.18, "learning_rate": 2.0093813813813816e-05, "loss": 0.9612, "step": 165500 }, { "epoch": 0.18, "learning_rate": 2.0063783783783784e-05, "loss": 0.9474, "step": 166000 }, { "epoch": 0.18, "learning_rate": 2.0033753753753755e-05, "loss": 0.9534, "step": 166500 }, { "epoch": 0.18, "learning_rate": 2.0003723723723726e-05, "loss": 0.9509, "step": 167000 }, { "epoch": 0.18, "learning_rate": 1.9973693693693694e-05, "loss": 0.955, "step": 167500 }, { "epoch": 0.18, "learning_rate": 1.9943663663663662e-05, "loss": 0.9719, "step": 168000 }, { "epoch": 0.18, "learning_rate": 1.9913633633633634e-05, "loss": 0.9597, "step": 168500 }, { "epoch": 0.18, "learning_rate": 1.9883603603603605e-05, "loss": 0.9484, "step": 169000 }, { "epoch": 0.18, "learning_rate": 1.9853573573573577e-05, "loss": 0.9539, "step": 169500 }, { "epoch": 0.18, "learning_rate": 1.9823543543543545e-05, "loss": 0.9493, "step": 170000 }, { "epoch": 0.18, "eval_loss": 0.8906692266464233, "eval_runtime": 620.1154, "eval_samples_per_second": 161.26, "eval_steps_per_second": 40.315, "step": 170000 }, { "epoch": 0.18, "learning_rate": 1.9793513513513513e-05, "loss": 0.9523, "step": 170500 }, { "epoch": 0.18, "learning_rate": 1.9763483483483484e-05, "loss": 0.9462, "step": 171000 }, { "epoch": 0.18, "learning_rate": 1.9733453453453455e-05, "loss": 0.9438, "step": 171500 }, { "epoch": 0.18, "learning_rate": 1.9703423423423423e-05, "loss": 0.9407, "step": 172000 }, { "epoch": 0.18, "learning_rate": 1.967339339339339e-05, "loss": 0.9508, "step": 172500 }, { "epoch": 0.19, "learning_rate": 1.9643363363363363e-05, "loss": 0.9544, "step": 173000 }, { "epoch": 0.19, "learning_rate": 1.9613393393393392e-05, "loss": 0.9576, "step": 173500 }, { "epoch": 0.19, "learning_rate": 1.9583363363363363e-05, "loss": 0.9528, "step": 174000 }, { "epoch": 0.19, "learning_rate": 1.9553333333333335e-05, "loss": 0.9481, "step": 174500 }, { "epoch": 0.19, "learning_rate": 1.9523363363363364e-05, "loss": 0.9393, "step": 175000 }, { "epoch": 0.19, "learning_rate": 1.9493333333333335e-05, "loss": 0.9377, "step": 175500 }, { "epoch": 0.19, "learning_rate": 1.9463303303303303e-05, "loss": 0.9342, "step": 176000 }, { "epoch": 0.19, "learning_rate": 1.9433273273273275e-05, "loss": 0.9309, "step": 176500 }, { "epoch": 0.19, "learning_rate": 1.9403243243243243e-05, "loss": 0.9607, "step": 177000 }, { "epoch": 0.19, "learning_rate": 1.9373213213213214e-05, "loss": 0.9508, "step": 177500 }, { "epoch": 0.19, "learning_rate": 1.9343183183183186e-05, "loss": 0.9361, "step": 178000 }, { "epoch": 0.19, "learning_rate": 1.9313153153153154e-05, "loss": 0.9312, "step": 178500 }, { "epoch": 0.19, "learning_rate": 1.9283183183183183e-05, "loss": 0.9503, "step": 179000 }, { "epoch": 0.19, "learning_rate": 1.9253153153153154e-05, "loss": 0.9509, "step": 179500 }, { "epoch": 0.19, "learning_rate": 1.9223183183183183e-05, "loss": 0.9311, "step": 180000 }, { "epoch": 0.19, "eval_loss": 0.8821685910224915, "eval_runtime": 603.0834, "eval_samples_per_second": 165.815, "eval_steps_per_second": 41.454, "step": 180000 }, { "epoch": 0.19, "learning_rate": 1.9193153153153155e-05, "loss": 0.95, "step": 180500 }, { "epoch": 0.19, "learning_rate": 1.9163123123123126e-05, "loss": 0.9439, "step": 181000 }, { "epoch": 0.19, "learning_rate": 1.9133093093093094e-05, "loss": 0.9397, "step": 181500 }, { "epoch": 0.2, "learning_rate": 1.9103063063063062e-05, "loss": 0.9353, "step": 182000 }, { "epoch": 0.2, "learning_rate": 1.9073033033033033e-05, "loss": 0.9464, "step": 182500 }, { "epoch": 0.2, "learning_rate": 1.9043003003003005e-05, "loss": 0.9346, "step": 183000 }, { "epoch": 0.2, "learning_rate": 1.9012972972972976e-05, "loss": 0.9596, "step": 183500 }, { "epoch": 0.2, "learning_rate": 1.8983003003003005e-05, "loss": 0.9653, "step": 184000 }, { "epoch": 0.2, "learning_rate": 1.8952972972972973e-05, "loss": 0.9459, "step": 184500 }, { "epoch": 0.2, "learning_rate": 1.8923003003003006e-05, "loss": 0.9249, "step": 185000 }, { "epoch": 0.2, "learning_rate": 1.8892972972972974e-05, "loss": 0.9413, "step": 185500 }, { "epoch": 0.2, "learning_rate": 1.8862942942942942e-05, "loss": 0.943, "step": 186000 }, { "epoch": 0.2, "learning_rate": 1.8832912912912913e-05, "loss": 0.947, "step": 186500 }, { "epoch": 0.2, "learning_rate": 1.8802882882882885e-05, "loss": 0.9567, "step": 187000 }, { "epoch": 0.2, "learning_rate": 1.8772912912912914e-05, "loss": 0.9314, "step": 187500 }, { "epoch": 0.2, "learning_rate": 1.8742882882882882e-05, "loss": 0.9512, "step": 188000 }, { "epoch": 0.2, "learning_rate": 1.8712852852852853e-05, "loss": 0.9505, "step": 188500 }, { "epoch": 0.2, "learning_rate": 1.8682822822822825e-05, "loss": 0.9338, "step": 189000 }, { "epoch": 0.2, "learning_rate": 1.8652792792792793e-05, "loss": 0.9395, "step": 189500 }, { "epoch": 0.2, "learning_rate": 1.8622762762762764e-05, "loss": 0.9462, "step": 190000 }, { "epoch": 0.2, "eval_loss": 0.8739633560180664, "eval_runtime": 600.0958, "eval_samples_per_second": 166.64, "eval_steps_per_second": 41.66, "step": 190000 }, { "epoch": 0.2, "learning_rate": 1.8592732732732732e-05, "loss": 0.9298, "step": 190500 }, { "epoch": 0.2, "learning_rate": 1.8562702702702704e-05, "loss": 0.9386, "step": 191000 }, { "epoch": 0.21, "learning_rate": 1.8532732732732733e-05, "loss": 0.937, "step": 191500 }, { "epoch": 0.21, "learning_rate": 1.8502702702702704e-05, "loss": 0.9436, "step": 192000 }, { "epoch": 0.21, "learning_rate": 1.8472672672672676e-05, "loss": 0.9416, "step": 192500 }, { "epoch": 0.21, "learning_rate": 1.8442642642642644e-05, "loss": 0.9432, "step": 193000 }, { "epoch": 0.21, "learning_rate": 1.841261261261261e-05, "loss": 0.9358, "step": 193500 }, { "epoch": 0.21, "learning_rate": 1.8382582582582583e-05, "loss": 0.9322, "step": 194000 }, { "epoch": 0.21, "learning_rate": 1.8352552552552554e-05, "loss": 0.9325, "step": 194500 }, { "epoch": 0.21, "learning_rate": 1.8322522522522526e-05, "loss": 0.9309, "step": 195000 }, { "epoch": 0.21, "learning_rate": 1.8292552552552555e-05, "loss": 0.9439, "step": 195500 }, { "epoch": 0.21, "learning_rate": 1.8262522522522523e-05, "loss": 0.9186, "step": 196000 }, { "epoch": 0.21, "learning_rate": 1.823249249249249e-05, "loss": 0.9252, "step": 196500 }, { "epoch": 0.21, "learning_rate": 1.8202462462462462e-05, "loss": 0.9458, "step": 197000 }, { "epoch": 0.21, "learning_rate": 1.817249249249249e-05, "loss": 0.9415, "step": 197500 }, { "epoch": 0.21, "learning_rate": 1.8142462462462463e-05, "loss": 0.9387, "step": 198000 }, { "epoch": 0.21, "learning_rate": 1.8112432432432434e-05, "loss": 0.922, "step": 198500 }, { "epoch": 0.21, "learning_rate": 1.8082462462462464e-05, "loss": 0.9299, "step": 199000 }, { "epoch": 0.21, "learning_rate": 1.805243243243243e-05, "loss": 0.9296, "step": 199500 }, { "epoch": 0.21, "learning_rate": 1.8022402402402403e-05, "loss": 0.9302, "step": 200000 }, { "epoch": 0.21, "eval_loss": 0.868446946144104, "eval_runtime": 599.5829, "eval_samples_per_second": 166.783, "eval_steps_per_second": 41.696, "step": 200000 }, { "epoch": 0.22, "learning_rate": 1.7992372372372374e-05, "loss": 0.9236, "step": 200500 }, { "epoch": 0.22, "learning_rate": 1.7962342342342342e-05, "loss": 0.9376, "step": 201000 }, { "epoch": 0.22, "learning_rate": 1.7932312312312314e-05, "loss": 0.9373, "step": 201500 }, { "epoch": 0.22, "learning_rate": 1.7902342342342343e-05, "loss": 0.9363, "step": 202000 }, { "epoch": 0.22, "learning_rate": 1.787231231231231e-05, "loss": 0.9386, "step": 202500 }, { "epoch": 0.22, "learning_rate": 1.7842282282282282e-05, "loss": 0.9139, "step": 203000 }, { "epoch": 0.22, "learning_rate": 1.7812252252252254e-05, "loss": 0.9403, "step": 203500 }, { "epoch": 0.22, "learning_rate": 1.7782282282282283e-05, "loss": 0.9184, "step": 204000 }, { "epoch": 0.22, "learning_rate": 1.7752252252252254e-05, "loss": 0.9281, "step": 204500 }, { "epoch": 0.22, "learning_rate": 1.7722222222222226e-05, "loss": 0.9358, "step": 205000 }, { "epoch": 0.22, "learning_rate": 1.769219219219219e-05, "loss": 0.9337, "step": 205500 }, { "epoch": 0.22, "learning_rate": 1.7662162162162162e-05, "loss": 0.9295, "step": 206000 }, { "epoch": 0.22, "learning_rate": 1.7632132132132133e-05, "loss": 0.931, "step": 206500 }, { "epoch": 0.22, "learning_rate": 1.7602102102102105e-05, "loss": 0.9348, "step": 207000 }, { "epoch": 0.22, "learning_rate": 1.7572072072072073e-05, "loss": 0.9153, "step": 207500 }, { "epoch": 0.22, "learning_rate": 1.7542102102102102e-05, "loss": 0.9248, "step": 208000 }, { "epoch": 0.22, "learning_rate": 1.7512072072072073e-05, "loss": 0.9298, "step": 208500 }, { "epoch": 0.22, "learning_rate": 1.748204204204204e-05, "loss": 0.929, "step": 209000 }, { "epoch": 0.22, "learning_rate": 1.7452012012012013e-05, "loss": 0.9327, "step": 209500 }, { "epoch": 0.23, "learning_rate": 1.7421981981981984e-05, "loss": 0.921, "step": 210000 }, { "epoch": 0.23, "eval_loss": 0.8622388243675232, "eval_runtime": 575.3524, "eval_samples_per_second": 173.807, "eval_steps_per_second": 43.452, "step": 210000 }, { "epoch": 0.23, "learning_rate": 1.7392012012012013e-05, "loss": 0.9248, "step": 210500 }, { "epoch": 0.23, "learning_rate": 1.736198198198198e-05, "loss": 0.9119, "step": 211000 }, { "epoch": 0.23, "learning_rate": 1.7331951951951952e-05, "loss": 0.9253, "step": 211500 }, { "epoch": 0.23, "learning_rate": 1.7301921921921924e-05, "loss": 0.9215, "step": 212000 }, { "epoch": 0.23, "learning_rate": 1.7271891891891892e-05, "loss": 0.947, "step": 212500 }, { "epoch": 0.23, "learning_rate": 1.7241861861861863e-05, "loss": 0.9279, "step": 213000 }, { "epoch": 0.23, "learning_rate": 1.721183183183183e-05, "loss": 0.9202, "step": 213500 }, { "epoch": 0.23, "learning_rate": 1.7181801801801803e-05, "loss": 0.9283, "step": 214000 }, { "epoch": 0.23, "learning_rate": 1.7151831831831832e-05, "loss": 0.917, "step": 214500 }, { "epoch": 0.23, "learning_rate": 1.712186186186186e-05, "loss": 0.9394, "step": 215000 }, { "epoch": 0.23, "learning_rate": 1.7091831831831832e-05, "loss": 0.9431, "step": 215500 }, { "epoch": 0.23, "learning_rate": 1.7061801801801804e-05, "loss": 0.9241, "step": 216000 }, { "epoch": 0.23, "learning_rate": 1.7031771771771775e-05, "loss": 0.9305, "step": 216500 }, { "epoch": 0.23, "learning_rate": 1.700174174174174e-05, "loss": 0.9231, "step": 217000 }, { "epoch": 0.23, "learning_rate": 1.6971771771771772e-05, "loss": 0.9238, "step": 217500 }, { "epoch": 0.23, "learning_rate": 1.694174174174174e-05, "loss": 0.9168, "step": 218000 }, { "epoch": 0.23, "learning_rate": 1.6911771771771773e-05, "loss": 0.9201, "step": 218500 }, { "epoch": 0.23, "learning_rate": 1.688174174174174e-05, "loss": 0.9141, "step": 219000 }, { "epoch": 0.24, "learning_rate": 1.6851771771771774e-05, "loss": 0.9076, "step": 219500 }, { "epoch": 0.24, "learning_rate": 1.682174174174174e-05, "loss": 0.9198, "step": 220000 }, { "epoch": 0.24, "eval_loss": 0.85796719789505, "eval_runtime": 609.8643, "eval_samples_per_second": 163.971, "eval_steps_per_second": 40.993, "step": 220000 }, { "epoch": 0.24, "learning_rate": 1.6791711711711713e-05, "loss": 0.9256, "step": 220500 }, { "epoch": 0.24, "learning_rate": 1.676168168168168e-05, "loss": 0.9195, "step": 221000 }, { "epoch": 0.24, "learning_rate": 1.6731651651651652e-05, "loss": 0.916, "step": 221500 }, { "epoch": 0.24, "learning_rate": 1.6701621621621624e-05, "loss": 0.9283, "step": 222000 }, { "epoch": 0.24, "learning_rate": 1.6671591591591592e-05, "loss": 0.912, "step": 222500 }, { "epoch": 0.24, "learning_rate": 1.664156156156156e-05, "loss": 0.9138, "step": 223000 }, { "epoch": 0.24, "learning_rate": 1.661153153153153e-05, "loss": 0.924, "step": 223500 }, { "epoch": 0.24, "learning_rate": 1.6581501501501503e-05, "loss": 0.9147, "step": 224000 }, { "epoch": 0.24, "learning_rate": 1.6551471471471474e-05, "loss": 0.9228, "step": 224500 }, { "epoch": 0.24, "learning_rate": 1.6521441441441442e-05, "loss": 0.9203, "step": 225000 }, { "epoch": 0.24, "learning_rate": 1.649141141141141e-05, "loss": 0.9118, "step": 225500 }, { "epoch": 0.24, "learning_rate": 1.646138138138138e-05, "loss": 0.9151, "step": 226000 }, { "epoch": 0.24, "learning_rate": 1.6431351351351353e-05, "loss": 0.9149, "step": 226500 }, { "epoch": 0.24, "learning_rate": 1.6401321321321324e-05, "loss": 0.9162, "step": 227000 }, { "epoch": 0.24, "learning_rate": 1.6371471471471472e-05, "loss": 0.9191, "step": 227500 }, { "epoch": 0.24, "learning_rate": 1.63415015015015e-05, "loss": 0.9146, "step": 228000 }, { "epoch": 0.25, "learning_rate": 1.6311471471471473e-05, "loss": 0.9198, "step": 228500 }, { "epoch": 0.25, "learning_rate": 1.628144144144144e-05, "loss": 0.9175, "step": 229000 }, { "epoch": 0.25, "learning_rate": 1.6251411411411412e-05, "loss": 0.9145, "step": 229500 }, { "epoch": 0.25, "learning_rate": 1.622138138138138e-05, "loss": 0.9115, "step": 230000 }, { "epoch": 0.25, "eval_loss": 0.8521190881729126, "eval_runtime": 576.8956, "eval_samples_per_second": 173.342, "eval_steps_per_second": 43.335, "step": 230000 }, { "epoch": 0.25, "learning_rate": 1.619141141141141e-05, "loss": 0.9222, "step": 230500 }, { "epoch": 0.25, "learning_rate": 1.616138138138138e-05, "loss": 0.9218, "step": 231000 }, { "epoch": 0.25, "learning_rate": 1.6131351351351352e-05, "loss": 0.9173, "step": 231500 }, { "epoch": 0.25, "learning_rate": 1.6101321321321324e-05, "loss": 0.9215, "step": 232000 }, { "epoch": 0.25, "learning_rate": 1.6071291291291292e-05, "loss": 0.905, "step": 232500 }, { "epoch": 0.25, "learning_rate": 1.604126126126126e-05, "loss": 0.9172, "step": 233000 }, { "epoch": 0.25, "learning_rate": 1.601123123123123e-05, "loss": 0.9092, "step": 233500 }, { "epoch": 0.25, "learning_rate": 1.5981201201201203e-05, "loss": 0.9137, "step": 234000 }, { "epoch": 0.25, "learning_rate": 1.5951171171171174e-05, "loss": 0.9082, "step": 234500 }, { "epoch": 0.25, "learning_rate": 1.592114114114114e-05, "loss": 0.9221, "step": 235000 }, { "epoch": 0.25, "learning_rate": 1.589111111111111e-05, "loss": 0.9057, "step": 235500 }, { "epoch": 0.25, "learning_rate": 1.586114114114114e-05, "loss": 0.9145, "step": 236000 }, { "epoch": 0.25, "learning_rate": 1.583111111111111e-05, "loss": 0.9046, "step": 236500 }, { "epoch": 0.25, "learning_rate": 1.5801081081081082e-05, "loss": 0.8992, "step": 237000 }, { "epoch": 0.25, "learning_rate": 1.5771051051051053e-05, "loss": 0.9011, "step": 237500 }, { "epoch": 0.26, "learning_rate": 1.574102102102102e-05, "loss": 0.9066, "step": 238000 }, { "epoch": 0.26, "learning_rate": 1.571099099099099e-05, "loss": 0.9076, "step": 238500 }, { "epoch": 0.26, "learning_rate": 1.568096096096096e-05, "loss": 0.8947, "step": 239000 }, { "epoch": 0.26, "learning_rate": 1.5650930930930932e-05, "loss": 0.8981, "step": 239500 }, { "epoch": 0.26, "learning_rate": 1.5620900900900904e-05, "loss": 0.9177, "step": 240000 }, { "epoch": 0.26, "eval_loss": 0.8470381498336792, "eval_runtime": 609.4393, "eval_samples_per_second": 164.085, "eval_steps_per_second": 41.021, "step": 240000 }, { "epoch": 0.26, "learning_rate": 1.559087087087087e-05, "loss": 0.9091, "step": 240500 }, { "epoch": 0.26, "learning_rate": 1.556084084084084e-05, "loss": 0.9154, "step": 241000 }, { "epoch": 0.26, "learning_rate": 1.553081081081081e-05, "loss": 0.9091, "step": 241500 }, { "epoch": 0.26, "learning_rate": 1.5500780780780782e-05, "loss": 0.9216, "step": 242000 }, { "epoch": 0.26, "learning_rate": 1.547075075075075e-05, "loss": 0.9147, "step": 242500 }, { "epoch": 0.26, "learning_rate": 1.5440720720720722e-05, "loss": 0.9052, "step": 243000 }, { "epoch": 0.26, "learning_rate": 1.541069069069069e-05, "loss": 0.9056, "step": 243500 }, { "epoch": 0.26, "learning_rate": 1.5380720720720722e-05, "loss": 0.9149, "step": 244000 }, { "epoch": 0.26, "learning_rate": 1.535069069069069e-05, "loss": 0.9082, "step": 244500 }, { "epoch": 0.26, "learning_rate": 1.5320660660660662e-05, "loss": 0.9136, "step": 245000 }, { "epoch": 0.26, "learning_rate": 1.529069069069069e-05, "loss": 0.9214, "step": 245500 }, { "epoch": 0.26, "learning_rate": 1.526066066066066e-05, "loss": 0.9158, "step": 246000 }, { "epoch": 0.26, "learning_rate": 1.523063063063063e-05, "loss": 0.8907, "step": 246500 }, { "epoch": 0.26, "learning_rate": 1.520066066066066e-05, "loss": 0.8975, "step": 247000 }, { "epoch": 0.27, "learning_rate": 1.5170630630630631e-05, "loss": 0.8965, "step": 247500 }, { "epoch": 0.27, "learning_rate": 1.5140600600600602e-05, "loss": 0.8937, "step": 248000 }, { "epoch": 0.27, "learning_rate": 1.5110570570570572e-05, "loss": 0.8949, "step": 248500 }, { "epoch": 0.27, "learning_rate": 1.508054054054054e-05, "loss": 0.9091, "step": 249000 }, { "epoch": 0.27, "learning_rate": 1.505051051051051e-05, "loss": 0.9114, "step": 249500 }, { "epoch": 0.27, "learning_rate": 1.5020480480480481e-05, "loss": 0.8959, "step": 250000 }, { "epoch": 0.27, "eval_loss": 0.8428720235824585, "eval_runtime": 586.0492, "eval_samples_per_second": 170.634, "eval_steps_per_second": 42.659, "step": 250000 }, { "epoch": 0.27, "learning_rate": 1.4990510510510512e-05, "loss": 0.904, "step": 250500 }, { "epoch": 0.27, "learning_rate": 1.496048048048048e-05, "loss": 0.9161, "step": 251000 }, { "epoch": 0.27, "learning_rate": 1.4930450450450451e-05, "loss": 0.8939, "step": 251500 }, { "epoch": 0.27, "learning_rate": 1.4900420420420421e-05, "loss": 0.9136, "step": 252000 }, { "epoch": 0.27, "learning_rate": 1.487039039039039e-05, "loss": 0.9042, "step": 252500 }, { "epoch": 0.27, "learning_rate": 1.484036036036036e-05, "loss": 0.9086, "step": 253000 }, { "epoch": 0.27, "learning_rate": 1.481033033033033e-05, "loss": 0.9049, "step": 253500 }, { "epoch": 0.27, "learning_rate": 1.4780300300300302e-05, "loss": 0.9066, "step": 254000 }, { "epoch": 0.27, "learning_rate": 1.475027027027027e-05, "loss": 0.8934, "step": 254500 }, { "epoch": 0.27, "learning_rate": 1.4720240240240241e-05, "loss": 0.909, "step": 255000 }, { "epoch": 0.27, "learning_rate": 1.469021021021021e-05, "loss": 0.8941, "step": 255500 }, { "epoch": 0.27, "learning_rate": 1.4660240240240242e-05, "loss": 0.9059, "step": 256000 }, { "epoch": 0.28, "learning_rate": 1.463021021021021e-05, "loss": 0.9114, "step": 256500 }, { "epoch": 0.28, "learning_rate": 1.4600180180180181e-05, "loss": 0.8954, "step": 257000 }, { "epoch": 0.28, "learning_rate": 1.457015015015015e-05, "loss": 0.9018, "step": 257500 }, { "epoch": 0.28, "learning_rate": 1.454018018018018e-05, "loss": 0.9021, "step": 258000 }, { "epoch": 0.28, "learning_rate": 1.4510150150150151e-05, "loss": 0.8995, "step": 258500 }, { "epoch": 0.28, "learning_rate": 1.448012012012012e-05, "loss": 0.909, "step": 259000 }, { "epoch": 0.28, "learning_rate": 1.4450150150150152e-05, "loss": 0.8923, "step": 259500 }, { "epoch": 0.28, "learning_rate": 1.442012012012012e-05, "loss": 0.9074, "step": 260000 }, { "epoch": 0.28, "eval_loss": 0.8390381932258606, "eval_runtime": 573.4535, "eval_samples_per_second": 174.382, "eval_steps_per_second": 43.596, "step": 260000 }, { "epoch": 0.28, "learning_rate": 1.4390090090090091e-05, "loss": 0.9006, "step": 260500 }, { "epoch": 0.28, "learning_rate": 1.4360060060060061e-05, "loss": 0.9005, "step": 261000 }, { "epoch": 0.28, "learning_rate": 1.433003003003003e-05, "loss": 0.9141, "step": 261500 }, { "epoch": 0.28, "learning_rate": 1.43e-05, "loss": 0.9145, "step": 262000 }, { "epoch": 0.28, "learning_rate": 1.426996996996997e-05, "loss": 0.899, "step": 262500 }, { "epoch": 0.28, "learning_rate": 1.423993993993994e-05, "loss": 0.9111, "step": 263000 }, { "epoch": 0.28, "learning_rate": 1.420996996996997e-05, "loss": 0.906, "step": 263500 }, { "epoch": 0.28, "learning_rate": 1.417993993993994e-05, "loss": 0.8911, "step": 264000 }, { "epoch": 0.28, "learning_rate": 1.4149909909909912e-05, "loss": 0.8914, "step": 264500 }, { "epoch": 0.28, "learning_rate": 1.411987987987988e-05, "loss": 0.8978, "step": 265000 }, { "epoch": 0.28, "learning_rate": 1.4089849849849851e-05, "loss": 0.8921, "step": 265500 }, { "epoch": 0.29, "learning_rate": 1.4059819819819819e-05, "loss": 0.8938, "step": 266000 }, { "epoch": 0.29, "learning_rate": 1.402978978978979e-05, "loss": 0.9009, "step": 266500 }, { "epoch": 0.29, "learning_rate": 1.3999759759759759e-05, "loss": 0.8957, "step": 267000 }, { "epoch": 0.29, "learning_rate": 1.396984984984985e-05, "loss": 0.8944, "step": 267500 }, { "epoch": 0.29, "learning_rate": 1.393981981981982e-05, "loss": 0.8903, "step": 268000 }, { "epoch": 0.29, "learning_rate": 1.390978978978979e-05, "loss": 0.8932, "step": 268500 }, { "epoch": 0.29, "learning_rate": 1.3879759759759761e-05, "loss": 0.8853, "step": 269000 }, { "epoch": 0.29, "learning_rate": 1.384972972972973e-05, "loss": 0.8908, "step": 269500 }, { "epoch": 0.29, "learning_rate": 1.38196996996997e-05, "loss": 0.8888, "step": 270000 }, { "epoch": 0.29, "eval_loss": 0.8343602418899536, "eval_runtime": 589.1233, "eval_samples_per_second": 169.744, "eval_steps_per_second": 42.436, "step": 270000 }, { "epoch": 0.29, "learning_rate": 1.3789669669669669e-05, "loss": 0.8947, "step": 270500 }, { "epoch": 0.29, "learning_rate": 1.375963963963964e-05, "loss": 0.8939, "step": 271000 }, { "epoch": 0.29, "learning_rate": 1.3729609609609612e-05, "loss": 0.9018, "step": 271500 }, { "epoch": 0.29, "learning_rate": 1.369957957957958e-05, "loss": 0.9036, "step": 272000 }, { "epoch": 0.29, "learning_rate": 1.3669549549549551e-05, "loss": 0.8973, "step": 272500 }, { "epoch": 0.29, "learning_rate": 1.3639519519519519e-05, "loss": 0.885, "step": 273000 }, { "epoch": 0.29, "learning_rate": 1.360954954954955e-05, "loss": 0.8845, "step": 273500 }, { "epoch": 0.29, "learning_rate": 1.357951951951952e-05, "loss": 0.8789, "step": 274000 }, { "epoch": 0.29, "learning_rate": 1.354948948948949e-05, "loss": 0.8986, "step": 274500 }, { "epoch": 0.29, "learning_rate": 1.351945945945946e-05, "loss": 0.8909, "step": 275000 }, { "epoch": 0.3, "learning_rate": 1.348942942942943e-05, "loss": 0.9002, "step": 275500 }, { "epoch": 0.3, "learning_rate": 1.3459459459459461e-05, "loss": 0.8946, "step": 276000 }, { "epoch": 0.3, "learning_rate": 1.342942942942943e-05, "loss": 0.8911, "step": 276500 }, { "epoch": 0.3, "learning_rate": 1.33993993993994e-05, "loss": 0.891, "step": 277000 }, { "epoch": 0.3, "learning_rate": 1.3369369369369369e-05, "loss": 0.8833, "step": 277500 }, { "epoch": 0.3, "learning_rate": 1.33393993993994e-05, "loss": 0.8863, "step": 278000 }, { "epoch": 0.3, "learning_rate": 1.330936936936937e-05, "loss": 0.8765, "step": 278500 }, { "epoch": 0.3, "learning_rate": 1.32793993993994e-05, "loss": 0.8945, "step": 279000 }, { "epoch": 0.3, "learning_rate": 1.324936936936937e-05, "loss": 0.8936, "step": 279500 }, { "epoch": 0.3, "learning_rate": 1.321933933933934e-05, "loss": 0.9012, "step": 280000 }, { "epoch": 0.3, "eval_loss": 0.8308248519897461, "eval_runtime": 569.4338, "eval_samples_per_second": 175.613, "eval_steps_per_second": 43.903, "step": 280000 }, { "epoch": 0.3, "learning_rate": 1.3189309309309311e-05, "loss": 0.8835, "step": 280500 }, { "epoch": 0.3, "learning_rate": 1.3159279279279279e-05, "loss": 0.8866, "step": 281000 }, { "epoch": 0.3, "learning_rate": 1.312924924924925e-05, "loss": 0.8876, "step": 281500 }, { "epoch": 0.3, "learning_rate": 1.309927927927928e-05, "loss": 0.8846, "step": 282000 }, { "epoch": 0.3, "learning_rate": 1.306924924924925e-05, "loss": 0.8833, "step": 282500 }, { "epoch": 0.3, "learning_rate": 1.3039219219219219e-05, "loss": 0.8993, "step": 283000 }, { "epoch": 0.3, "learning_rate": 1.300918918918919e-05, "loss": 0.8802, "step": 283500 }, { "epoch": 0.3, "learning_rate": 1.297915915915916e-05, "loss": 0.8787, "step": 284000 }, { "epoch": 0.31, "learning_rate": 1.294912912912913e-05, "loss": 0.8809, "step": 284500 }, { "epoch": 0.31, "learning_rate": 1.291915915915916e-05, "loss": 0.8848, "step": 285000 }, { "epoch": 0.31, "learning_rate": 1.2889129129129129e-05, "loss": 0.8847, "step": 285500 }, { "epoch": 0.31, "learning_rate": 1.28590990990991e-05, "loss": 0.891, "step": 286000 }, { "epoch": 0.31, "learning_rate": 1.2829069069069068e-05, "loss": 0.8789, "step": 286500 }, { "epoch": 0.31, "learning_rate": 1.279903903903904e-05, "loss": 0.8962, "step": 287000 }, { "epoch": 0.31, "learning_rate": 1.276900900900901e-05, "loss": 0.8931, "step": 287500 }, { "epoch": 0.31, "learning_rate": 1.2738978978978979e-05, "loss": 0.8789, "step": 288000 }, { "epoch": 0.31, "learning_rate": 1.270900900900901e-05, "loss": 0.8914, "step": 288500 }, { "epoch": 0.31, "learning_rate": 1.267897897897898e-05, "loss": 0.8902, "step": 289000 }, { "epoch": 0.31, "learning_rate": 1.2648948948948949e-05, "loss": 0.8773, "step": 289500 }, { "epoch": 0.31, "learning_rate": 1.2618918918918919e-05, "loss": 0.8996, "step": 290000 }, { "epoch": 0.31, "eval_loss": 0.8253816366195679, "eval_runtime": 603.3986, "eval_samples_per_second": 165.728, "eval_steps_per_second": 41.432, "step": 290000 }, { "epoch": 0.31, "learning_rate": 1.2588888888888888e-05, "loss": 0.8824, "step": 290500 }, { "epoch": 0.31, "learning_rate": 1.255885885885886e-05, "loss": 0.8916, "step": 291000 }, { "epoch": 0.31, "learning_rate": 1.252882882882883e-05, "loss": 0.887, "step": 291500 }, { "epoch": 0.31, "learning_rate": 1.24987987987988e-05, "loss": 0.8808, "step": 292000 }, { "epoch": 0.31, "learning_rate": 1.2468828828828828e-05, "loss": 0.8912, "step": 292500 }, { "epoch": 0.31, "learning_rate": 1.24387987987988e-05, "loss": 0.8746, "step": 293000 }, { "epoch": 0.31, "learning_rate": 1.2408768768768768e-05, "loss": 0.8738, "step": 293500 }, { "epoch": 0.32, "learning_rate": 1.237873873873874e-05, "loss": 0.8837, "step": 294000 }, { "epoch": 0.32, "learning_rate": 1.2348768768768768e-05, "loss": 0.8826, "step": 294500 }, { "epoch": 0.32, "learning_rate": 1.231873873873874e-05, "loss": 0.875, "step": 295000 }, { "epoch": 0.32, "learning_rate": 1.228870870870871e-05, "loss": 0.8842, "step": 295500 }, { "epoch": 0.32, "learning_rate": 1.225867867867868e-05, "loss": 0.8778, "step": 296000 }, { "epoch": 0.32, "learning_rate": 1.222870870870871e-05, "loss": 0.8771, "step": 296500 }, { "epoch": 0.32, "learning_rate": 1.2198678678678678e-05, "loss": 0.8934, "step": 297000 }, { "epoch": 0.32, "learning_rate": 1.216864864864865e-05, "loss": 0.881, "step": 297500 }, { "epoch": 0.32, "learning_rate": 1.2138618618618617e-05, "loss": 0.8993, "step": 298000 }, { "epoch": 0.32, "learning_rate": 1.2108648648648648e-05, "loss": 0.8738, "step": 298500 }, { "epoch": 0.32, "learning_rate": 1.2078618618618618e-05, "loss": 0.8851, "step": 299000 }, { "epoch": 0.32, "learning_rate": 1.204858858858859e-05, "loss": 0.8802, "step": 299500 }, { "epoch": 0.32, "learning_rate": 1.201855855855856e-05, "loss": 0.8783, "step": 300000 }, { "epoch": 0.32, "eval_loss": 0.8227924108505249, "eval_runtime": 588.0109, "eval_samples_per_second": 170.065, "eval_steps_per_second": 42.516, "step": 300000 }, { "epoch": 0.32, "learning_rate": 1.198864864864865e-05, "loss": 0.8749, "step": 300500 }, { "epoch": 0.32, "learning_rate": 1.195861861861862e-05, "loss": 0.8893, "step": 301000 }, { "epoch": 0.32, "learning_rate": 1.1928588588588589e-05, "loss": 0.8715, "step": 301500 }, { "epoch": 0.32, "learning_rate": 1.1898558558558559e-05, "loss": 0.8894, "step": 302000 }, { "epoch": 0.32, "learning_rate": 1.1868588588588588e-05, "loss": 0.875, "step": 302500 }, { "epoch": 0.32, "learning_rate": 1.183855855855856e-05, "loss": 0.8829, "step": 303000 }, { "epoch": 0.33, "learning_rate": 1.1808528528528529e-05, "loss": 0.8872, "step": 303500 }, { "epoch": 0.33, "learning_rate": 1.1778498498498499e-05, "loss": 0.8888, "step": 304000 }, { "epoch": 0.33, "learning_rate": 1.174846846846847e-05, "loss": 0.8716, "step": 304500 }, { "epoch": 0.33, "learning_rate": 1.1718438438438438e-05, "loss": 0.8725, "step": 305000 }, { "epoch": 0.33, "learning_rate": 1.168840840840841e-05, "loss": 0.8749, "step": 305500 }, { "epoch": 0.33, "learning_rate": 1.1658378378378377e-05, "loss": 0.8823, "step": 306000 }, { "epoch": 0.33, "learning_rate": 1.1628348348348349e-05, "loss": 0.8788, "step": 306500 }, { "epoch": 0.33, "learning_rate": 1.1598378378378378e-05, "loss": 0.8827, "step": 307000 }, { "epoch": 0.33, "learning_rate": 1.156834834834835e-05, "loss": 0.8728, "step": 307500 }, { "epoch": 0.33, "learning_rate": 1.1538318318318319e-05, "loss": 0.8875, "step": 308000 }, { "epoch": 0.33, "learning_rate": 1.1508288288288289e-05, "loss": 0.8812, "step": 308500 }, { "epoch": 0.33, "learning_rate": 1.1478378378378377e-05, "loss": 0.8671, "step": 309000 }, { "epoch": 0.33, "learning_rate": 1.1448348348348349e-05, "loss": 0.8723, "step": 309500 }, { "epoch": 0.33, "learning_rate": 1.1418318318318319e-05, "loss": 0.8796, "step": 310000 }, { "epoch": 0.33, "eval_loss": 0.8185199499130249, "eval_runtime": 589.4368, "eval_samples_per_second": 169.653, "eval_steps_per_second": 42.413, "step": 310000 }, { "epoch": 0.33, "learning_rate": 1.1388288288288288e-05, "loss": 0.8775, "step": 310500 }, { "epoch": 0.33, "learning_rate": 1.1358318318318319e-05, "loss": 0.875, "step": 311000 }, { "epoch": 0.33, "learning_rate": 1.1328288288288289e-05, "loss": 0.8852, "step": 311500 }, { "epoch": 0.33, "learning_rate": 1.1298258258258259e-05, "loss": 0.877, "step": 312000 }, { "epoch": 0.34, "learning_rate": 1.1268228228228228e-05, "loss": 0.8655, "step": 312500 }, { "epoch": 0.34, "learning_rate": 1.1238198198198198e-05, "loss": 0.8779, "step": 313000 }, { "epoch": 0.34, "learning_rate": 1.1208228228228227e-05, "loss": 0.8751, "step": 313500 }, { "epoch": 0.34, "learning_rate": 1.1178198198198199e-05, "loss": 0.8678, "step": 314000 }, { "epoch": 0.34, "learning_rate": 1.114816816816817e-05, "loss": 0.8676, "step": 314500 }, { "epoch": 0.34, "learning_rate": 1.1118138138138138e-05, "loss": 0.8695, "step": 315000 }, { "epoch": 0.34, "learning_rate": 1.108810810810811e-05, "loss": 0.8735, "step": 315500 }, { "epoch": 0.34, "learning_rate": 1.1058078078078077e-05, "loss": 0.8809, "step": 316000 }, { "epoch": 0.34, "learning_rate": 1.1028048048048049e-05, "loss": 0.8762, "step": 316500 }, { "epoch": 0.34, "learning_rate": 1.0998018018018018e-05, "loss": 0.871, "step": 317000 }, { "epoch": 0.34, "learning_rate": 1.0968108108108109e-05, "loss": 0.8726, "step": 317500 }, { "epoch": 0.34, "learning_rate": 1.0938078078078077e-05, "loss": 0.8649, "step": 318000 }, { "epoch": 0.34, "learning_rate": 1.0908048048048048e-05, "loss": 0.8849, "step": 318500 }, { "epoch": 0.34, "learning_rate": 1.087801801801802e-05, "loss": 0.8812, "step": 319000 }, { "epoch": 0.34, "learning_rate": 1.0848048048048049e-05, "loss": 0.862, "step": 319500 }, { "epoch": 0.34, "learning_rate": 1.0818018018018018e-05, "loss": 0.8739, "step": 320000 }, { "epoch": 0.34, "eval_loss": 0.8156814575195312, "eval_runtime": 622.3771, "eval_samples_per_second": 160.674, "eval_steps_per_second": 40.169, "step": 320000 }, { "epoch": 0.34, "learning_rate": 1.0787987987987988e-05, "loss": 0.8759, "step": 320500 }, { "epoch": 0.34, "learning_rate": 1.0757957957957958e-05, "loss": 0.8817, "step": 321000 }, { "epoch": 0.34, "learning_rate": 1.0727927927927928e-05, "loss": 0.8834, "step": 321500 }, { "epoch": 0.35, "learning_rate": 1.0697897897897897e-05, "loss": 0.8693, "step": 322000 }, { "epoch": 0.35, "learning_rate": 1.0667867867867869e-05, "loss": 0.8618, "step": 322500 }, { "epoch": 0.35, "learning_rate": 1.0637837837837838e-05, "loss": 0.8678, "step": 323000 }, { "epoch": 0.35, "learning_rate": 1.0607807807807808e-05, "loss": 0.8805, "step": 323500 }, { "epoch": 0.35, "learning_rate": 1.0577777777777778e-05, "loss": 0.8654, "step": 324000 }, { "epoch": 0.35, "learning_rate": 1.0547747747747747e-05, "loss": 0.8551, "step": 324500 }, { "epoch": 0.35, "learning_rate": 1.0517717717717719e-05, "loss": 0.8831, "step": 325000 }, { "epoch": 0.35, "learning_rate": 1.0487747747747748e-05, "loss": 0.8771, "step": 325500 }, { "epoch": 0.35, "learning_rate": 1.0457717717717718e-05, "loss": 0.8717, "step": 326000 }, { "epoch": 0.35, "learning_rate": 1.0427747747747749e-05, "loss": 0.8588, "step": 326500 }, { "epoch": 0.35, "learning_rate": 1.0397717717717718e-05, "loss": 0.882, "step": 327000 }, { "epoch": 0.35, "learning_rate": 1.0367687687687688e-05, "loss": 0.8688, "step": 327500 }, { "epoch": 0.35, "learning_rate": 1.0337657657657658e-05, "loss": 0.8675, "step": 328000 }, { "epoch": 0.35, "learning_rate": 1.0307627627627627e-05, "loss": 0.8583, "step": 328500 }, { "epoch": 0.35, "learning_rate": 1.0277597597597597e-05, "loss": 0.8745, "step": 329000 }, { "epoch": 0.35, "learning_rate": 1.0247567567567569e-05, "loss": 0.8631, "step": 329500 }, { "epoch": 0.35, "learning_rate": 1.0217537537537537e-05, "loss": 0.8637, "step": 330000 }, { "epoch": 0.35, "eval_loss": 0.8101323246955872, "eval_runtime": 593.644, "eval_samples_per_second": 168.451, "eval_steps_per_second": 42.113, "step": 330000 }, { "epoch": 0.35, "learning_rate": 1.0187567567567569e-05, "loss": 0.8626, "step": 330500 }, { "epoch": 0.35, "learning_rate": 1.0157597597597598e-05, "loss": 0.8651, "step": 331000 }, { "epoch": 0.36, "learning_rate": 1.012762762762763e-05, "loss": 0.8706, "step": 331500 }, { "epoch": 0.36, "learning_rate": 1.0097597597597597e-05, "loss": 0.8766, "step": 332000 }, { "epoch": 0.36, "learning_rate": 1.0067567567567569e-05, "loss": 0.8753, "step": 332500 }, { "epoch": 0.36, "learning_rate": 1.0037537537537537e-05, "loss": 0.8735, "step": 333000 }, { "epoch": 0.36, "learning_rate": 1.0007567567567567e-05, "loss": 0.8747, "step": 333500 }, { "epoch": 0.36, "learning_rate": 9.977537537537537e-06, "loss": 0.8596, "step": 334000 }, { "epoch": 0.36, "learning_rate": 9.947507507507509e-06, "loss": 0.868, "step": 334500 }, { "epoch": 0.36, "learning_rate": 9.917477477477478e-06, "loss": 0.8683, "step": 335000 }, { "epoch": 0.36, "learning_rate": 9.887447447447448e-06, "loss": 0.8763, "step": 335500 }, { "epoch": 0.36, "learning_rate": 9.857417417417418e-06, "loss": 0.8638, "step": 336000 }, { "epoch": 0.36, "learning_rate": 9.827387387387387e-06, "loss": 0.8677, "step": 336500 }, { "epoch": 0.36, "learning_rate": 9.797357357357357e-06, "loss": 0.8753, "step": 337000 }, { "epoch": 0.36, "learning_rate": 9.767327327327328e-06, "loss": 0.8708, "step": 337500 }, { "epoch": 0.36, "learning_rate": 9.737297297297298e-06, "loss": 0.8722, "step": 338000 }, { "epoch": 0.36, "learning_rate": 9.707327327327329e-06, "loss": 0.877, "step": 338500 }, { "epoch": 0.36, "learning_rate": 9.677357357357358e-06, "loss": 0.8641, "step": 339000 }, { "epoch": 0.36, "learning_rate": 9.647327327327328e-06, "loss": 0.8656, "step": 339500 }, { "epoch": 0.36, "learning_rate": 9.617297297297298e-06, "loss": 0.87, "step": 340000 }, { "epoch": 0.36, "eval_loss": 0.8081399202346802, "eval_runtime": 594.2224, "eval_samples_per_second": 168.287, "eval_steps_per_second": 42.072, "step": 340000 }, { "epoch": 0.37, "learning_rate": 9.587267267267267e-06, "loss": 0.8514, "step": 340500 }, { "epoch": 0.37, "learning_rate": 9.557237237237237e-06, "loss": 0.8726, "step": 341000 }, { "epoch": 0.37, "learning_rate": 9.527207207207207e-06, "loss": 0.8674, "step": 341500 }, { "epoch": 0.37, "learning_rate": 9.497177177177178e-06, "loss": 0.8656, "step": 342000 }, { "epoch": 0.37, "learning_rate": 9.467147147147148e-06, "loss": 0.8688, "step": 342500 }, { "epoch": 0.37, "learning_rate": 9.437177177177179e-06, "loss": 0.8619, "step": 343000 }, { "epoch": 0.37, "learning_rate": 9.407147147147147e-06, "loss": 0.8576, "step": 343500 }, { "epoch": 0.37, "learning_rate": 9.377117117117118e-06, "loss": 0.861, "step": 344000 }, { "epoch": 0.37, "learning_rate": 9.347087087087086e-06, "loss": 0.8655, "step": 344500 }, { "epoch": 0.37, "learning_rate": 9.317057057057058e-06, "loss": 0.8534, "step": 345000 }, { "epoch": 0.37, "learning_rate": 9.287087087087087e-06, "loss": 0.8768, "step": 345500 }, { "epoch": 0.37, "learning_rate": 9.257057057057058e-06, "loss": 0.8584, "step": 346000 }, { "epoch": 0.37, "learning_rate": 9.227027027027028e-06, "loss": 0.8716, "step": 346500 }, { "epoch": 0.37, "learning_rate": 9.196996996996997e-06, "loss": 0.8568, "step": 347000 }, { "epoch": 0.37, "learning_rate": 9.166966966966967e-06, "loss": 0.8503, "step": 347500 }, { "epoch": 0.37, "learning_rate": 9.136936936936937e-06, "loss": 0.8573, "step": 348000 }, { "epoch": 0.37, "learning_rate": 9.106906906906907e-06, "loss": 0.8619, "step": 348500 }, { "epoch": 0.37, "learning_rate": 9.076936936936936e-06, "loss": 0.8588, "step": 349000 }, { "epoch": 0.37, "learning_rate": 9.046906906906907e-06, "loss": 0.8632, "step": 349500 }, { "epoch": 0.38, "learning_rate": 9.016876876876879e-06, "loss": 0.8517, "step": 350000 }, { "epoch": 0.38, "eval_loss": 0.8035141229629517, "eval_runtime": 567.0359, "eval_samples_per_second": 176.356, "eval_steps_per_second": 44.089, "step": 350000 }, { "epoch": 0.38, "learning_rate": 8.986846846846847e-06, "loss": 0.8759, "step": 350500 }, { "epoch": 0.38, "learning_rate": 8.956816816816818e-06, "loss": 0.8636, "step": 351000 }, { "epoch": 0.38, "learning_rate": 8.926906906906907e-06, "loss": 0.8523, "step": 351500 }, { "epoch": 0.38, "learning_rate": 8.896876876876878e-06, "loss": 0.8508, "step": 352000 }, { "epoch": 0.38, "learning_rate": 8.866906906906907e-06, "loss": 0.86, "step": 352500 }, { "epoch": 0.38, "learning_rate": 8.836876876876877e-06, "loss": 0.8693, "step": 353000 }, { "epoch": 0.38, "learning_rate": 8.806846846846847e-06, "loss": 0.8597, "step": 353500 }, { "epoch": 0.38, "learning_rate": 8.776816816816818e-06, "loss": 0.8509, "step": 354000 }, { "epoch": 0.38, "learning_rate": 8.746786786786786e-06, "loss": 0.8521, "step": 354500 }, { "epoch": 0.38, "learning_rate": 8.716756756756757e-06, "loss": 0.847, "step": 355000 }, { "epoch": 0.38, "learning_rate": 8.686726726726727e-06, "loss": 0.8739, "step": 355500 }, { "epoch": 0.38, "learning_rate": 8.656696696696697e-06, "loss": 0.8625, "step": 356000 }, { "epoch": 0.38, "learning_rate": 8.626666666666667e-06, "loss": 0.866, "step": 356500 }, { "epoch": 0.38, "learning_rate": 8.596636636636636e-06, "loss": 0.8592, "step": 357000 }, { "epoch": 0.38, "learning_rate": 8.566606606606608e-06, "loss": 0.8673, "step": 357500 }, { "epoch": 0.38, "learning_rate": 8.536576576576577e-06, "loss": 0.8658, "step": 358000 }, { "epoch": 0.38, "learning_rate": 8.506546546546547e-06, "loss": 0.8646, "step": 358500 }, { "epoch": 0.38, "learning_rate": 8.476576576576578e-06, "loss": 0.8572, "step": 359000 }, { "epoch": 0.39, "learning_rate": 8.446546546546546e-06, "loss": 0.8656, "step": 359500 }, { "epoch": 0.39, "learning_rate": 8.416516516516517e-06, "loss": 0.8523, "step": 360000 }, { "epoch": 0.39, "eval_loss": 0.802210807800293, "eval_runtime": 630.0985, "eval_samples_per_second": 158.705, "eval_steps_per_second": 39.676, "step": 360000 }, { "epoch": 0.39, "learning_rate": 8.386486486486485e-06, "loss": 0.8687, "step": 360500 }, { "epoch": 0.39, "learning_rate": 8.356456456456457e-06, "loss": 0.8481, "step": 361000 }, { "epoch": 0.39, "learning_rate": 8.326426426426428e-06, "loss": 0.87, "step": 361500 }, { "epoch": 0.39, "learning_rate": 8.296456456456457e-06, "loss": 0.8499, "step": 362000 }, { "epoch": 0.39, "learning_rate": 8.266486486486488e-06, "loss": 0.8482, "step": 362500 }, { "epoch": 0.39, "learning_rate": 8.236456456456456e-06, "loss": 0.8452, "step": 363000 }, { "epoch": 0.39, "learning_rate": 8.206426426426428e-06, "loss": 0.8575, "step": 363500 }, { "epoch": 0.39, "learning_rate": 8.176456456456457e-06, "loss": 0.8567, "step": 364000 }, { "epoch": 0.39, "learning_rate": 8.146426426426426e-06, "loss": 0.8449, "step": 364500 }, { "epoch": 0.39, "learning_rate": 8.116396396396396e-06, "loss": 0.8634, "step": 365000 }, { "epoch": 0.39, "learning_rate": 8.086366366366368e-06, "loss": 0.8552, "step": 365500 }, { "epoch": 0.39, "learning_rate": 8.056336336336337e-06, "loss": 0.8593, "step": 366000 }, { "epoch": 0.39, "learning_rate": 8.026306306306307e-06, "loss": 0.846, "step": 366500 }, { "epoch": 0.39, "learning_rate": 7.996276276276277e-06, "loss": 0.8575, "step": 367000 }, { "epoch": 0.39, "learning_rate": 7.966246246246246e-06, "loss": 0.8659, "step": 367500 }, { "epoch": 0.39, "learning_rate": 7.936216216216216e-06, "loss": 0.8543, "step": 368000 }, { "epoch": 0.4, "learning_rate": 7.906186186186186e-06, "loss": 0.8501, "step": 368500 }, { "epoch": 0.4, "learning_rate": 7.876156156156155e-06, "loss": 0.8605, "step": 369000 }, { "epoch": 0.4, "learning_rate": 7.846126126126127e-06, "loss": 0.8541, "step": 369500 }, { "epoch": 0.4, "learning_rate": 7.816096096096097e-06, "loss": 0.85, "step": 370000 }, { "epoch": 0.4, "eval_loss": 0.798128604888916, "eval_runtime": 622.5637, "eval_samples_per_second": 160.626, "eval_steps_per_second": 40.157, "step": 370000 }, { "epoch": 0.4, "learning_rate": 7.786126126126127e-06, "loss": 0.8495, "step": 370500 }, { "epoch": 0.4, "learning_rate": 7.756096096096095e-06, "loss": 0.8448, "step": 371000 }, { "epoch": 0.4, "learning_rate": 7.726066066066067e-06, "loss": 0.8508, "step": 371500 }, { "epoch": 0.4, "learning_rate": 7.696036036036035e-06, "loss": 0.8564, "step": 372000 }, { "epoch": 0.4, "learning_rate": 7.666006006006006e-06, "loss": 0.8434, "step": 372500 }, { "epoch": 0.4, "learning_rate": 7.636036036036037e-06, "loss": 0.853, "step": 373000 }, { "epoch": 0.4, "learning_rate": 7.606006006006006e-06, "loss": 0.8536, "step": 373500 }, { "epoch": 0.4, "learning_rate": 7.5759759759759765e-06, "loss": 0.8416, "step": 374000 }, { "epoch": 0.4, "learning_rate": 7.545945945945945e-06, "loss": 0.8509, "step": 374500 }, { "epoch": 0.4, "learning_rate": 7.515915915915916e-06, "loss": 0.852, "step": 375000 }, { "epoch": 0.4, "learning_rate": 7.4858858858858865e-06, "loss": 0.8481, "step": 375500 }, { "epoch": 0.4, "learning_rate": 7.455855855855856e-06, "loss": 0.8505, "step": 376000 }, { "epoch": 0.4, "learning_rate": 7.425825825825826e-06, "loss": 0.8425, "step": 376500 }, { "epoch": 0.4, "learning_rate": 7.395855855855856e-06, "loss": 0.8514, "step": 377000 }, { "epoch": 0.4, "learning_rate": 7.365825825825826e-06, "loss": 0.8478, "step": 377500 }, { "epoch": 0.41, "learning_rate": 7.335795795795796e-06, "loss": 0.8468, "step": 378000 }, { "epoch": 0.41, "learning_rate": 7.305765765765766e-06, "loss": 0.8621, "step": 378500 }, { "epoch": 0.41, "learning_rate": 7.275795795795796e-06, "loss": 0.8486, "step": 379000 }, { "epoch": 0.41, "learning_rate": 7.245825825825827e-06, "loss": 0.8491, "step": 379500 }, { "epoch": 0.41, "learning_rate": 7.2157957957957965e-06, "loss": 0.8329, "step": 380000 }, { "epoch": 0.41, "eval_loss": 0.795859694480896, "eval_runtime": 607.5667, "eval_samples_per_second": 164.591, "eval_steps_per_second": 41.148, "step": 380000 }, { "epoch": 0.41, "learning_rate": 7.185765765765766e-06, "loss": 0.852, "step": 380500 }, { "epoch": 0.41, "learning_rate": 7.155735735735736e-06, "loss": 0.8493, "step": 381000 }, { "epoch": 0.41, "learning_rate": 7.125765765765766e-06, "loss": 0.8506, "step": 381500 }, { "epoch": 0.41, "learning_rate": 7.095735735735736e-06, "loss": 0.8537, "step": 382000 }, { "epoch": 0.41, "learning_rate": 7.065765765765766e-06, "loss": 0.8387, "step": 382500 }, { "epoch": 0.41, "learning_rate": 7.035735735735736e-06, "loss": 0.844, "step": 383000 }, { "epoch": 0.41, "learning_rate": 7.005705705705706e-06, "loss": 0.8467, "step": 383500 }, { "epoch": 0.41, "learning_rate": 6.975675675675676e-06, "loss": 0.8499, "step": 384000 }, { "epoch": 0.41, "learning_rate": 6.945645645645646e-06, "loss": 0.8463, "step": 384500 }, { "epoch": 0.41, "learning_rate": 6.915615615615616e-06, "loss": 0.8336, "step": 385000 }, { "epoch": 0.41, "learning_rate": 6.8855855855855855e-06, "loss": 0.8423, "step": 385500 }, { "epoch": 0.41, "learning_rate": 6.855555555555555e-06, "loss": 0.8323, "step": 386000 }, { "epoch": 0.41, "learning_rate": 6.825525525525526e-06, "loss": 0.8372, "step": 386500 }, { "epoch": 0.41, "learning_rate": 6.795495495495496e-06, "loss": 0.8564, "step": 387000 }, { "epoch": 0.42, "learning_rate": 6.765465465465466e-06, "loss": 0.8527, "step": 387500 }, { "epoch": 0.42, "learning_rate": 6.735435435435436e-06, "loss": 0.8376, "step": 388000 }, { "epoch": 0.42, "learning_rate": 6.7054054054054054e-06, "loss": 0.835, "step": 388500 }, { "epoch": 0.42, "learning_rate": 6.675375375375375e-06, "loss": 0.8449, "step": 389000 }, { "epoch": 0.42, "learning_rate": 6.645345345345346e-06, "loss": 0.8472, "step": 389500 }, { "epoch": 0.42, "learning_rate": 6.615315315315315e-06, "loss": 0.8473, "step": 390000 }, { "epoch": 0.42, "eval_loss": 0.7919498682022095, "eval_runtime": 570.4158, "eval_samples_per_second": 175.311, "eval_steps_per_second": 43.828, "step": 390000 }, { "epoch": 0.42, "learning_rate": 6.585285285285285e-06, "loss": 0.8466, "step": 390500 }, { "epoch": 0.42, "learning_rate": 6.555315315315316e-06, "loss": 0.8593, "step": 391000 }, { "epoch": 0.42, "learning_rate": 6.525285285285286e-06, "loss": 0.8513, "step": 391500 }, { "epoch": 0.42, "learning_rate": 6.495255255255255e-06, "loss": 0.8398, "step": 392000 }, { "epoch": 0.42, "learning_rate": 6.465225225225225e-06, "loss": 0.8447, "step": 392500 }, { "epoch": 0.42, "learning_rate": 6.4353153153153154e-06, "loss": 0.8523, "step": 393000 }, { "epoch": 0.42, "learning_rate": 6.405285285285285e-06, "loss": 0.8435, "step": 393500 }, { "epoch": 0.42, "learning_rate": 6.375255255255255e-06, "loss": 0.8606, "step": 394000 }, { "epoch": 0.42, "learning_rate": 6.345225225225225e-06, "loss": 0.8538, "step": 394500 }, { "epoch": 0.42, "learning_rate": 6.315195195195196e-06, "loss": 0.8616, "step": 395000 }, { "epoch": 0.42, "learning_rate": 6.285165165165166e-06, "loss": 0.8418, "step": 395500 }, { "epoch": 0.42, "learning_rate": 6.255135135135135e-06, "loss": 0.8449, "step": 396000 }, { "epoch": 0.43, "learning_rate": 6.225105105105105e-06, "loss": 0.8382, "step": 396500 }, { "epoch": 0.43, "learning_rate": 6.195135135135135e-06, "loss": 0.8516, "step": 397000 }, { "epoch": 0.43, "learning_rate": 6.165105105105105e-06, "loss": 0.8472, "step": 397500 }, { "epoch": 0.43, "learning_rate": 6.135135135135135e-06, "loss": 0.8354, "step": 398000 }, { "epoch": 0.43, "learning_rate": 6.1051051051051045e-06, "loss": 0.856, "step": 398500 }, { "epoch": 0.43, "learning_rate": 6.075075075075076e-06, "loss": 0.8292, "step": 399000 }, { "epoch": 0.43, "learning_rate": 6.045045045045046e-06, "loss": 0.8403, "step": 399500 }, { "epoch": 0.43, "learning_rate": 6.015015015015015e-06, "loss": 0.8495, "step": 400000 }, { "epoch": 0.43, "eval_loss": 0.7893297672271729, "eval_runtime": 586.3311, "eval_samples_per_second": 170.552, "eval_steps_per_second": 42.638, "step": 400000 }, { "epoch": 0.43, "learning_rate": 5.985045045045045e-06, "loss": 0.8557, "step": 400500 }, { "epoch": 0.43, "learning_rate": 5.955015015015015e-06, "loss": 0.8496, "step": 401000 }, { "epoch": 0.43, "learning_rate": 5.925045045045045e-06, "loss": 0.8406, "step": 401500 }, { "epoch": 0.43, "learning_rate": 5.895015015015015e-06, "loss": 0.8466, "step": 402000 }, { "epoch": 0.43, "learning_rate": 5.8649849849849845e-06, "loss": 0.8401, "step": 402500 }, { "epoch": 0.43, "learning_rate": 5.834954954954956e-06, "loss": 0.8327, "step": 403000 }, { "epoch": 0.43, "learning_rate": 5.8049249249249256e-06, "loss": 0.8292, "step": 403500 }, { "epoch": 0.43, "learning_rate": 5.774894894894895e-06, "loss": 0.8479, "step": 404000 }, { "epoch": 0.43, "learning_rate": 5.744864864864865e-06, "loss": 0.8425, "step": 404500 }, { "epoch": 0.43, "learning_rate": 5.714834834834835e-06, "loss": 0.8512, "step": 405000 }, { "epoch": 0.43, "learning_rate": 5.684804804804804e-06, "loss": 0.8414, "step": 405500 }, { "epoch": 0.44, "learning_rate": 5.654774774774775e-06, "loss": 0.8307, "step": 406000 }, { "epoch": 0.44, "learning_rate": 5.624744744744745e-06, "loss": 0.829, "step": 406500 }, { "epoch": 0.44, "learning_rate": 5.5947747747747755e-06, "loss": 0.8441, "step": 407000 }, { "epoch": 0.44, "learning_rate": 5.564744744744745e-06, "loss": 0.8467, "step": 407500 }, { "epoch": 0.44, "learning_rate": 5.534714714714715e-06, "loss": 0.8505, "step": 408000 }, { "epoch": 0.44, "learning_rate": 5.504684684684685e-06, "loss": 0.8398, "step": 408500 }, { "epoch": 0.44, "learning_rate": 5.474714714714715e-06, "loss": 0.8352, "step": 409000 }, { "epoch": 0.44, "learning_rate": 5.444684684684684e-06, "loss": 0.8452, "step": 409500 }, { "epoch": 0.44, "learning_rate": 5.414654654654655e-06, "loss": 0.8468, "step": 410000 }, { "epoch": 0.44, "eval_loss": 0.7882465124130249, "eval_runtime": 564.1275, "eval_samples_per_second": 177.265, "eval_steps_per_second": 44.316, "step": 410000 }, { "epoch": 0.44, "learning_rate": 5.384684684684685e-06, "loss": 0.8428, "step": 410500 }, { "epoch": 0.44, "learning_rate": 5.3546546546546555e-06, "loss": 0.8524, "step": 411000 }, { "epoch": 0.44, "learning_rate": 5.324624624624625e-06, "loss": 0.8399, "step": 411500 }, { "epoch": 0.44, "learning_rate": 5.294594594594595e-06, "loss": 0.8295, "step": 412000 }, { "epoch": 0.44, "learning_rate": 5.2645645645645646e-06, "loss": 0.8332, "step": 412500 }, { "epoch": 0.44, "learning_rate": 5.234534534534534e-06, "loss": 0.8444, "step": 413000 }, { "epoch": 0.44, "learning_rate": 5.204504504504505e-06, "loss": 0.8372, "step": 413500 }, { "epoch": 0.44, "learning_rate": 5.1744744744744745e-06, "loss": 0.8402, "step": 414000 }, { "epoch": 0.44, "learning_rate": 5.144444444444445e-06, "loss": 0.8521, "step": 414500 }, { "epoch": 0.45, "learning_rate": 5.114414414414415e-06, "loss": 0.8418, "step": 415000 }, { "epoch": 0.45, "learning_rate": 5.0843843843843845e-06, "loss": 0.8454, "step": 415500 }, { "epoch": 0.45, "learning_rate": 5.0544144144144145e-06, "loss": 0.8176, "step": 416000 }, { "epoch": 0.45, "learning_rate": 5.024384384384384e-06, "loss": 0.8556, "step": 416500 }, { "epoch": 0.45, "learning_rate": 4.994354354354355e-06, "loss": 0.8245, "step": 417000 }, { "epoch": 0.45, "learning_rate": 4.9643243243243245e-06, "loss": 0.8418, "step": 417500 }, { "epoch": 0.45, "learning_rate": 4.934294294294294e-06, "loss": 0.8351, "step": 418000 }, { "epoch": 0.45, "learning_rate": 4.904264264264265e-06, "loss": 0.841, "step": 418500 }, { "epoch": 0.45, "learning_rate": 4.874234234234234e-06, "loss": 0.8335, "step": 419000 }, { "epoch": 0.45, "learning_rate": 4.844204204204204e-06, "loss": 0.8299, "step": 419500 }, { "epoch": 0.45, "learning_rate": 4.814234234234234e-06, "loss": 0.8355, "step": 420000 }, { "epoch": 0.45, "eval_loss": 0.7847426533699036, "eval_runtime": 565.6865, "eval_samples_per_second": 176.776, "eval_steps_per_second": 44.194, "step": 420000 }, { "epoch": 0.45, "learning_rate": 4.784204204204205e-06, "loss": 0.8216, "step": 420500 }, { "epoch": 0.45, "learning_rate": 4.754174174174174e-06, "loss": 0.8486, "step": 421000 }, { "epoch": 0.45, "learning_rate": 4.724144144144144e-06, "loss": 0.8411, "step": 421500 }, { "epoch": 0.45, "learning_rate": 4.694114114114114e-06, "loss": 0.8447, "step": 422000 }, { "epoch": 0.45, "learning_rate": 4.664144144144145e-06, "loss": 0.8363, "step": 422500 }, { "epoch": 0.45, "learning_rate": 4.634114114114114e-06, "loss": 0.83, "step": 423000 }, { "epoch": 0.45, "learning_rate": 4.604084084084084e-06, "loss": 0.8398, "step": 423500 }, { "epoch": 0.45, "learning_rate": 4.574114114114114e-06, "loss": 0.8441, "step": 424000 }, { "epoch": 0.46, "learning_rate": 4.544084084084084e-06, "loss": 0.8373, "step": 424500 }, { "epoch": 0.46, "learning_rate": 4.514054054054054e-06, "loss": 0.8403, "step": 425000 }, { "epoch": 0.46, "learning_rate": 4.484024024024024e-06, "loss": 0.8391, "step": 425500 }, { "epoch": 0.46, "learning_rate": 4.453993993993994e-06, "loss": 0.832, "step": 426000 }, { "epoch": 0.46, "learning_rate": 4.423963963963964e-06, "loss": 0.8524, "step": 426500 }, { "epoch": 0.46, "learning_rate": 4.393933933933934e-06, "loss": 0.8396, "step": 427000 }, { "epoch": 0.46, "learning_rate": 4.3639039039039046e-06, "loss": 0.8363, "step": 427500 }, { "epoch": 0.46, "learning_rate": 4.333993993993994e-06, "loss": 0.834, "step": 428000 }, { "epoch": 0.46, "learning_rate": 4.303963963963964e-06, "loss": 0.8476, "step": 428500 }, { "epoch": 0.46, "learning_rate": 4.2739339339339335e-06, "loss": 0.8509, "step": 429000 }, { "epoch": 0.46, "learning_rate": 4.243903903903904e-06, "loss": 0.8368, "step": 429500 }, { "epoch": 0.46, "learning_rate": 4.213873873873874e-06, "loss": 0.8366, "step": 430000 }, { "epoch": 0.46, "eval_loss": 0.7822093963623047, "eval_runtime": 571.4633, "eval_samples_per_second": 174.989, "eval_steps_per_second": 43.747, "step": 430000 }, { "epoch": 0.46, "learning_rate": 4.183843843843844e-06, "loss": 0.8383, "step": 430500 }, { "epoch": 0.46, "learning_rate": 4.153813813813814e-06, "loss": 0.8404, "step": 431000 }, { "epoch": 0.46, "learning_rate": 4.123783783783784e-06, "loss": 0.8348, "step": 431500 }, { "epoch": 0.46, "learning_rate": 4.093813813813814e-06, "loss": 0.8332, "step": 432000 }, { "epoch": 0.46, "learning_rate": 4.063783783783783e-06, "loss": 0.8323, "step": 432500 }, { "epoch": 0.46, "learning_rate": 4.033753753753754e-06, "loss": 0.8299, "step": 433000 }, { "epoch": 0.46, "learning_rate": 4.003783783783784e-06, "loss": 0.831, "step": 433500 }, { "epoch": 0.47, "learning_rate": 3.973813813813814e-06, "loss": 0.837, "step": 434000 }, { "epoch": 0.47, "learning_rate": 3.9437837837837846e-06, "loss": 0.8455, "step": 434500 }, { "epoch": 0.47, "learning_rate": 3.913753753753754e-06, "loss": 0.8317, "step": 435000 }, { "epoch": 0.47, "learning_rate": 3.883723723723724e-06, "loss": 0.817, "step": 435500 }, { "epoch": 0.47, "learning_rate": 3.853693693693694e-06, "loss": 0.8346, "step": 436000 }, { "epoch": 0.47, "learning_rate": 3.823663663663663e-06, "loss": 0.8277, "step": 436500 }, { "epoch": 0.47, "learning_rate": 3.7936336336336343e-06, "loss": 0.8357, "step": 437000 }, { "epoch": 0.47, "learning_rate": 3.763603603603604e-06, "loss": 0.8382, "step": 437500 }, { "epoch": 0.47, "learning_rate": 3.7335735735735737e-06, "loss": 0.8501, "step": 438000 }, { "epoch": 0.47, "learning_rate": 3.7035435435435434e-06, "loss": 0.8233, "step": 438500 }, { "epoch": 0.47, "learning_rate": 3.673513513513514e-06, "loss": 0.8342, "step": 439000 }, { "epoch": 0.47, "learning_rate": 3.6434834834834837e-06, "loss": 0.8502, "step": 439500 }, { "epoch": 0.47, "learning_rate": 3.6135135135135137e-06, "loss": 0.8418, "step": 440000 }, { "epoch": 0.47, "eval_loss": 0.7793955206871033, "eval_runtime": 566.9399, "eval_samples_per_second": 176.386, "eval_steps_per_second": 44.096, "step": 440000 }, { "epoch": 0.47, "learning_rate": 3.5834834834834834e-06, "loss": 0.8153, "step": 440500 }, { "epoch": 0.47, "learning_rate": 3.5534534534534536e-06, "loss": 0.8435, "step": 441000 }, { "epoch": 0.47, "learning_rate": 3.5234234234234237e-06, "loss": 0.8288, "step": 441500 }, { "epoch": 0.47, "learning_rate": 3.4933933933933934e-06, "loss": 0.8185, "step": 442000 }, { "epoch": 0.47, "learning_rate": 3.4633633633633635e-06, "loss": 0.819, "step": 442500 }, { "epoch": 0.48, "learning_rate": 3.4333333333333336e-06, "loss": 0.8296, "step": 443000 }, { "epoch": 0.48, "learning_rate": 3.4033033033033033e-06, "loss": 0.815, "step": 443500 }, { "epoch": 0.48, "learning_rate": 3.3733933933933933e-06, "loss": 0.8266, "step": 444000 }, { "epoch": 0.48, "learning_rate": 3.3433633633633634e-06, "loss": 0.8252, "step": 444500 }, { "epoch": 0.48, "learning_rate": 3.3133333333333335e-06, "loss": 0.8336, "step": 445000 }, { "epoch": 0.48, "learning_rate": 3.2833033033033036e-06, "loss": 0.8363, "step": 445500 }, { "epoch": 0.48, "learning_rate": 3.2532732732732733e-06, "loss": 0.8297, "step": 446000 }, { "epoch": 0.48, "learning_rate": 3.2233033033033034e-06, "loss": 0.8247, "step": 446500 }, { "epoch": 0.48, "learning_rate": 3.1932732732732735e-06, "loss": 0.8205, "step": 447000 }, { "epoch": 0.48, "learning_rate": 3.163243243243243e-06, "loss": 0.8311, "step": 447500 }, { "epoch": 0.48, "learning_rate": 3.1332132132132133e-06, "loss": 0.8398, "step": 448000 }, { "epoch": 0.48, "learning_rate": 3.103183183183183e-06, "loss": 0.8344, "step": 448500 }, { "epoch": 0.48, "learning_rate": 3.0731531531531536e-06, "loss": 0.8124, "step": 449000 }, { "epoch": 0.48, "learning_rate": 3.043183183183183e-06, "loss": 0.8206, "step": 449500 }, { "epoch": 0.48, "learning_rate": 3.0131531531531533e-06, "loss": 0.8271, "step": 450000 }, { "epoch": 0.48, "eval_loss": 0.7778518199920654, "eval_runtime": 574.7064, "eval_samples_per_second": 174.002, "eval_steps_per_second": 43.5, "step": 450000 }, { "epoch": 0.48, "learning_rate": 2.983123123123123e-06, "loss": 0.8326, "step": 450500 }, { "epoch": 0.48, "learning_rate": 2.953093093093093e-06, "loss": 0.8222, "step": 451000 }, { "epoch": 0.48, "learning_rate": 2.9230630630630633e-06, "loss": 0.8365, "step": 451500 }, { "epoch": 0.48, "learning_rate": 2.893033033033033e-06, "loss": 0.8341, "step": 452000 }, { "epoch": 0.49, "learning_rate": 2.863003003003003e-06, "loss": 0.8402, "step": 452500 }, { "epoch": 0.49, "learning_rate": 2.8329729729729732e-06, "loss": 0.8316, "step": 453000 }, { "epoch": 0.49, "learning_rate": 2.8030030030030032e-06, "loss": 0.8329, "step": 453500 }, { "epoch": 0.49, "learning_rate": 2.772972972972973e-06, "loss": 0.8108, "step": 454000 }, { "epoch": 0.49, "learning_rate": 2.7429429429429426e-06, "loss": 0.815, "step": 454500 }, { "epoch": 0.49, "learning_rate": 2.712912912912913e-06, "loss": 0.834, "step": 455000 }, { "epoch": 0.49, "learning_rate": 2.683003003003003e-06, "loss": 0.8178, "step": 455500 }, { "epoch": 0.49, "learning_rate": 2.652972972972973e-06, "loss": 0.8206, "step": 456000 }, { "epoch": 0.49, "learning_rate": 2.622942942942943e-06, "loss": 0.8338, "step": 456500 }, { "epoch": 0.49, "learning_rate": 2.592912912912913e-06, "loss": 0.825, "step": 457000 }, { "epoch": 0.49, "learning_rate": 2.5628828828828828e-06, "loss": 0.818, "step": 457500 }, { "epoch": 0.49, "learning_rate": 2.532852852852853e-06, "loss": 0.8246, "step": 458000 }, { "epoch": 0.49, "learning_rate": 2.502822822822823e-06, "loss": 0.8359, "step": 458500 }, { "epoch": 0.49, "learning_rate": 2.472852852852853e-06, "loss": 0.8253, "step": 459000 }, { "epoch": 0.49, "learning_rate": 2.4428228228228228e-06, "loss": 0.8227, "step": 459500 }, { "epoch": 0.49, "learning_rate": 2.412792792792793e-06, "loss": 0.8394, "step": 460000 }, { "epoch": 0.49, "eval_loss": 0.7761884331703186, "eval_runtime": 598.2224, "eval_samples_per_second": 167.162, "eval_steps_per_second": 41.79, "step": 460000 }, { "epoch": 0.49, "learning_rate": 2.382762762762763e-06, "loss": 0.8264, "step": 460500 }, { "epoch": 0.49, "learning_rate": 2.3527327327327327e-06, "loss": 0.8397, "step": 461000 }, { "epoch": 0.49, "learning_rate": 2.3227627627627627e-06, "loss": 0.8178, "step": 461500 }, { "epoch": 0.5, "learning_rate": 2.292732732732733e-06, "loss": 0.8224, "step": 462000 }, { "epoch": 0.5, "learning_rate": 2.262702702702703e-06, "loss": 0.8338, "step": 462500 }, { "epoch": 0.5, "learning_rate": 2.2326726726726727e-06, "loss": 0.8179, "step": 463000 }, { "epoch": 0.5, "learning_rate": 2.2027027027027027e-06, "loss": 0.8332, "step": 463500 }, { "epoch": 0.5, "learning_rate": 2.1726726726726724e-06, "loss": 0.8318, "step": 464000 }, { "epoch": 0.5, "learning_rate": 2.142642642642643e-06, "loss": 0.8298, "step": 464500 }, { "epoch": 0.5, "learning_rate": 2.1126126126126127e-06, "loss": 0.8298, "step": 465000 }, { "epoch": 0.5, "learning_rate": 2.082582582582583e-06, "loss": 0.818, "step": 465500 }, { "epoch": 0.5, "learning_rate": 2.0525525525525525e-06, "loss": 0.8202, "step": 466000 }, { "epoch": 0.5, "learning_rate": 2.0225225225225226e-06, "loss": 0.8174, "step": 466500 }, { "epoch": 0.5, "learning_rate": 1.9924924924924928e-06, "loss": 0.8165, "step": 467000 }, { "epoch": 0.5, "learning_rate": 1.9625225225225224e-06, "loss": 0.8184, "step": 467500 }, { "epoch": 0.5, "learning_rate": 1.9324924924924925e-06, "loss": 0.823, "step": 468000 }, { "epoch": 0.5, "learning_rate": 1.9024624624624624e-06, "loss": 0.8224, "step": 468500 }, { "epoch": 0.5, "learning_rate": 1.8724324324324325e-06, "loss": 0.8266, "step": 469000 }, { "epoch": 0.5, "learning_rate": 1.8424024024024024e-06, "loss": 0.8199, "step": 469500 }, { "epoch": 0.5, "learning_rate": 1.8123723723723726e-06, "loss": 0.8319, "step": 470000 }, { "epoch": 0.5, "eval_loss": 0.7738833427429199, "eval_runtime": 565.2199, "eval_samples_per_second": 176.922, "eval_steps_per_second": 44.231, "step": 470000 }, { "epoch": 0.5, "learning_rate": 1.7823423423423423e-06, "loss": 0.834, "step": 470500 }, { "epoch": 0.51, "learning_rate": 1.7523123123123124e-06, "loss": 0.8208, "step": 471000 }, { "epoch": 0.51, "learning_rate": 1.7223423423423424e-06, "loss": 0.8136, "step": 471500 }, { "epoch": 0.51, "learning_rate": 1.6923123123123123e-06, "loss": 0.8286, "step": 472000 }, { "epoch": 0.51, "learning_rate": 1.6622822822822823e-06, "loss": 0.8284, "step": 472500 }, { "epoch": 0.51, "learning_rate": 1.6323123123123125e-06, "loss": 0.8276, "step": 473000 }, { "epoch": 0.51, "learning_rate": 1.6022822822822822e-06, "loss": 0.8387, "step": 473500 }, { "epoch": 0.51, "learning_rate": 1.5722522522522523e-06, "loss": 0.825, "step": 474000 }, { "epoch": 0.51, "learning_rate": 1.5422222222222222e-06, "loss": 0.8339, "step": 474500 }, { "epoch": 0.51, "learning_rate": 1.5121921921921924e-06, "loss": 0.8279, "step": 475000 }, { "epoch": 0.51, "learning_rate": 1.482162162162162e-06, "loss": 0.8162, "step": 475500 }, { "epoch": 0.51, "learning_rate": 1.4521321321321322e-06, "loss": 0.8312, "step": 476000 }, { "epoch": 0.51, "learning_rate": 1.422102102102102e-06, "loss": 0.826, "step": 476500 }, { "epoch": 0.51, "learning_rate": 1.3921321321321321e-06, "loss": 0.8415, "step": 477000 }, { "epoch": 0.51, "learning_rate": 1.362102102102102e-06, "loss": 0.8175, "step": 477500 }, { "epoch": 0.51, "learning_rate": 1.3320720720720722e-06, "loss": 0.8104, "step": 478000 }, { "epoch": 0.51, "learning_rate": 1.302042042042042e-06, "loss": 0.8368, "step": 478500 }, { "epoch": 0.51, "learning_rate": 1.272012012012012e-06, "loss": 0.8226, "step": 479000 }, { "epoch": 0.51, "learning_rate": 1.242042042042042e-06, "loss": 0.819, "step": 479500 }, { "epoch": 0.51, "learning_rate": 1.2120120120120121e-06, "loss": 0.8125, "step": 480000 }, { "epoch": 0.51, "eval_loss": 0.7732232809066772, "eval_runtime": 573.449, "eval_samples_per_second": 174.383, "eval_steps_per_second": 43.596, "step": 480000 }, { "epoch": 0.52, "learning_rate": 1.1819819819819819e-06, "loss": 0.8312, "step": 480500 }, { "epoch": 0.52, "learning_rate": 1.151951951951952e-06, "loss": 0.8299, "step": 481000 }, { "epoch": 0.52, "learning_rate": 1.121981981981982e-06, "loss": 0.8081, "step": 481500 }, { "epoch": 0.52, "learning_rate": 1.091951951951952e-06, "loss": 0.8267, "step": 482000 }, { "epoch": 0.52, "learning_rate": 1.061981981981982e-06, "loss": 0.8213, "step": 482500 }, { "epoch": 0.52, "learning_rate": 1.031951951951952e-06, "loss": 0.8024, "step": 483000 }, { "epoch": 0.52, "learning_rate": 1.0019219219219218e-06, "loss": 0.8223, "step": 483500 }, { "epoch": 0.52, "learning_rate": 9.71891891891892e-07, "loss": 0.8254, "step": 484000 }, { "epoch": 0.52, "learning_rate": 9.418618618618619e-07, "loss": 0.8258, "step": 484500 }, { "epoch": 0.52, "learning_rate": 9.118318318318318e-07, "loss": 0.8303, "step": 485000 }, { "epoch": 0.52, "learning_rate": 8.818018018018019e-07, "loss": 0.8197, "step": 485500 }, { "epoch": 0.52, "learning_rate": 8.517717717717718e-07, "loss": 0.824, "step": 486000 }, { "epoch": 0.52, "learning_rate": 8.218018018018018e-07, "loss": 0.8255, "step": 486500 }, { "epoch": 0.52, "learning_rate": 7.917717717717718e-07, "loss": 0.8235, "step": 487000 }, { "epoch": 0.52, "learning_rate": 7.618018018018018e-07, "loss": 0.8036, "step": 487500 }, { "epoch": 0.52, "learning_rate": 7.317717717717718e-07, "loss": 0.818, "step": 488000 }, { "epoch": 0.52, "learning_rate": 7.017417417417418e-07, "loss": 0.8402, "step": 488500 }, { "epoch": 0.52, "learning_rate": 6.717117117117117e-07, "loss": 0.8439, "step": 489000 }, { "epoch": 0.52, "learning_rate": 6.416816816816817e-07, "loss": 0.8121, "step": 489500 }, { "epoch": 0.53, "learning_rate": 6.116516516516516e-07, "loss": 0.8065, "step": 490000 }, { "epoch": 0.53, "eval_loss": 0.7721747756004333, "eval_runtime": 582.9907, "eval_samples_per_second": 171.529, "eval_steps_per_second": 42.882, "step": 490000 }, { "epoch": 0.53, "learning_rate": 5.816216216216216e-07, "loss": 0.827, "step": 490500 }, { "epoch": 0.53, "learning_rate": 5.515915915915916e-07, "loss": 0.8138, "step": 491000 }, { "epoch": 0.53, "learning_rate": 5.216216216216216e-07, "loss": 0.8212, "step": 491500 }, { "epoch": 0.53, "learning_rate": 4.915915915915916e-07, "loss": 0.8166, "step": 492000 }, { "epoch": 0.53, "learning_rate": 4.6156156156156157e-07, "loss": 0.8246, "step": 492500 }, { "epoch": 0.53, "learning_rate": 4.3153153153153154e-07, "loss": 0.821, "step": 493000 }, { "epoch": 0.53, "learning_rate": 4.015015015015015e-07, "loss": 0.8265, "step": 493500 }, { "epoch": 0.53, "learning_rate": 3.7153153153153153e-07, "loss": 0.8297, "step": 494000 }, { "epoch": 0.53, "learning_rate": 3.415015015015015e-07, "loss": 0.8053, "step": 494500 }, { "epoch": 0.53, "learning_rate": 3.1147147147147147e-07, "loss": 0.8171, "step": 495000 }, { "epoch": 0.53, "learning_rate": 2.8144144144144143e-07, "loss": 0.8261, "step": 495500 }, { "epoch": 0.53, "learning_rate": 2.5147147147147146e-07, "loss": 0.8216, "step": 496000 }, { "epoch": 0.53, "learning_rate": 2.2144144144144145e-07, "loss": 0.8195, "step": 496500 }, { "epoch": 0.53, "learning_rate": 1.9141141141141142e-07, "loss": 0.8303, "step": 497000 }, { "epoch": 0.53, "learning_rate": 1.613813813813814e-07, "loss": 0.836, "step": 497500 }, { "epoch": 0.53, "learning_rate": 1.3135135135135136e-07, "loss": 0.833, "step": 498000 }, { "epoch": 0.53, "learning_rate": 1.0132132132132131e-07, "loss": 0.8264, "step": 498500 }, { "epoch": 0.54, "learning_rate": 7.129129129129129e-08, "loss": 0.8173, "step": 499000 }, { "epoch": 0.54, "learning_rate": 4.1261261261261266e-08, "loss": 0.8153, "step": 499500 }, { "epoch": 0.54, "learning_rate": 1.1291291291291292e-08, "loss": 0.8383, "step": 500000 }, { "epoch": 0.54, "eval_loss": 0.7712512016296387, "eval_runtime": 603.0742, "eval_samples_per_second": 165.817, "eval_steps_per_second": 41.454, "step": 500000 }, { "epoch": 0.54, "learning_rate": 1.4998139069534767e-05, "loss": 0.8465, "step": 500500 }, { "epoch": 0.54, "learning_rate": 1.4983131565782893e-05, "loss": 0.8468, "step": 501000 }, { "epoch": 0.54, "learning_rate": 1.4968124062031015e-05, "loss": 0.8479, "step": 501500 }, { "epoch": 0.54, "learning_rate": 1.495311655827914e-05, "loss": 0.8614, "step": 502000 }, { "epoch": 0.54, "learning_rate": 1.4938139069534766e-05, "loss": 0.8439, "step": 502500 }, { "epoch": 0.54, "learning_rate": 1.4923131565782892e-05, "loss": 0.8538, "step": 503000 }, { "epoch": 0.54, "learning_rate": 1.4908124062031016e-05, "loss": 0.8375, "step": 503500 }, { "epoch": 0.54, "learning_rate": 1.4893116558279141e-05, "loss": 0.8551, "step": 504000 }, { "epoch": 0.54, "learning_rate": 1.4878109054527264e-05, "loss": 0.8507, "step": 504500 }, { "epoch": 0.54, "learning_rate": 1.4863101550775387e-05, "loss": 0.8539, "step": 505000 }, { "epoch": 0.54, "learning_rate": 1.4848094047023513e-05, "loss": 0.8515, "step": 505500 }, { "epoch": 0.54, "learning_rate": 1.483311655827914e-05, "loss": 0.8389, "step": 506000 }, { "epoch": 0.54, "learning_rate": 1.4818109054527264e-05, "loss": 0.8546, "step": 506500 }, { "epoch": 0.54, "learning_rate": 1.4803101550775388e-05, "loss": 0.844, "step": 507000 }, { "epoch": 0.54, "learning_rate": 1.4788094047023512e-05, "loss": 0.8389, "step": 507500 }, { "epoch": 0.54, "learning_rate": 1.4773086543271636e-05, "loss": 0.8542, "step": 508000 }, { "epoch": 0.55, "learning_rate": 1.475807903951976e-05, "loss": 0.8595, "step": 508500 }, { "epoch": 0.55, "learning_rate": 1.4743071535767885e-05, "loss": 0.8535, "step": 509000 }, { "epoch": 0.55, "learning_rate": 1.4728064032016008e-05, "loss": 0.8618, "step": 509500 }, { "epoch": 0.55, "learning_rate": 1.4713086543271636e-05, "loss": 0.8531, "step": 510000 }, { "epoch": 0.55, "eval_loss": 0.7975139021873474, "eval_runtime": 604.9535, "eval_samples_per_second": 165.302, "eval_steps_per_second": 41.325, "step": 510000 }, { "epoch": 0.55, "learning_rate": 1.4698079039519759e-05, "loss": 0.844, "step": 510500 }, { "epoch": 0.55, "learning_rate": 1.4683071535767884e-05, "loss": 0.8603, "step": 511000 }, { "epoch": 0.55, "learning_rate": 1.4668064032016008e-05, "loss": 0.8658, "step": 511500 }, { "epoch": 0.55, "learning_rate": 1.4653056528264134e-05, "loss": 0.8574, "step": 512000 }, { "epoch": 0.55, "learning_rate": 1.463807903951976e-05, "loss": 0.8587, "step": 512500 }, { "epoch": 0.55, "learning_rate": 1.4623071535767885e-05, "loss": 0.8496, "step": 513000 }, { "epoch": 0.55, "learning_rate": 1.4608064032016009e-05, "loss": 0.8558, "step": 513500 }, { "epoch": 0.55, "learning_rate": 1.4593056528264133e-05, "loss": 0.8604, "step": 514000 }, { "epoch": 0.55, "learning_rate": 1.4578079039519762e-05, "loss": 0.8619, "step": 514500 }, { "epoch": 0.55, "learning_rate": 1.4563101550775387e-05, "loss": 0.8637, "step": 515000 }, { "epoch": 0.55, "learning_rate": 1.4548094047023513e-05, "loss": 0.8512, "step": 515500 }, { "epoch": 0.55, "learning_rate": 1.4533086543271637e-05, "loss": 0.8522, "step": 516000 }, { "epoch": 0.55, "learning_rate": 1.451807903951976e-05, "loss": 0.8531, "step": 516500 }, { "epoch": 0.55, "learning_rate": 1.4503071535767884e-05, "loss": 0.859, "step": 517000 }, { "epoch": 0.55, "learning_rate": 1.4488064032016008e-05, "loss": 0.8399, "step": 517500 }, { "epoch": 0.56, "learning_rate": 1.4473056528264132e-05, "loss": 0.8597, "step": 518000 }, { "epoch": 0.56, "learning_rate": 1.4458049024512256e-05, "loss": 0.8739, "step": 518500 }, { "epoch": 0.56, "learning_rate": 1.4443071535767885e-05, "loss": 0.8528, "step": 519000 }, { "epoch": 0.56, "learning_rate": 1.4428064032016007e-05, "loss": 0.8613, "step": 519500 }, { "epoch": 0.56, "learning_rate": 1.4413056528264133e-05, "loss": 0.8647, "step": 520000 }, { "epoch": 0.56, "eval_loss": 0.8006455302238464, "eval_runtime": 614.931, "eval_samples_per_second": 162.62, "eval_steps_per_second": 40.655, "step": 520000 }, { "epoch": 0.56, "learning_rate": 1.4398049024512257e-05, "loss": 0.8663, "step": 520500 }, { "epoch": 0.56, "learning_rate": 1.4383071535767884e-05, "loss": 0.8476, "step": 521000 }, { "epoch": 0.56, "learning_rate": 1.4368064032016008e-05, "loss": 0.8491, "step": 521500 }, { "epoch": 0.56, "learning_rate": 1.4353056528264133e-05, "loss": 0.8642, "step": 522000 }, { "epoch": 0.56, "learning_rate": 1.4338049024512256e-05, "loss": 0.8615, "step": 522500 }, { "epoch": 0.56, "learning_rate": 1.4323071535767884e-05, "loss": 0.8509, "step": 523000 }, { "epoch": 0.56, "learning_rate": 1.4308064032016008e-05, "loss": 0.8413, "step": 523500 }, { "epoch": 0.56, "learning_rate": 1.4293056528264132e-05, "loss": 0.8668, "step": 524000 }, { "epoch": 0.56, "learning_rate": 1.4278049024512256e-05, "loss": 0.8579, "step": 524500 }, { "epoch": 0.56, "learning_rate": 1.426304152076038e-05, "loss": 0.8592, "step": 525000 }, { "epoch": 0.56, "learning_rate": 1.4248034017008506e-05, "loss": 0.8657, "step": 525500 }, { "epoch": 0.56, "learning_rate": 1.4233026513256628e-05, "loss": 0.8439, "step": 526000 }, { "epoch": 0.56, "learning_rate": 1.4218019009504754e-05, "loss": 0.8613, "step": 526500 }, { "epoch": 0.57, "learning_rate": 1.4203041520760379e-05, "loss": 0.8532, "step": 527000 }, { "epoch": 0.57, "learning_rate": 1.4188034017008505e-05, "loss": 0.8533, "step": 527500 }, { "epoch": 0.57, "learning_rate": 1.4173026513256629e-05, "loss": 0.8421, "step": 528000 }, { "epoch": 0.57, "learning_rate": 1.4158019009504754e-05, "loss": 0.8463, "step": 528500 }, { "epoch": 0.57, "learning_rate": 1.4143041520760381e-05, "loss": 0.8644, "step": 529000 }, { "epoch": 0.57, "learning_rate": 1.4128034017008505e-05, "loss": 0.8666, "step": 529500 }, { "epoch": 0.57, "learning_rate": 1.4113056528264132e-05, "loss": 0.8528, "step": 530000 }, { "epoch": 0.57, "eval_loss": 0.7983748316764832, "eval_runtime": 645.3764, "eval_samples_per_second": 154.948, "eval_steps_per_second": 38.737, "step": 530000 }, { "epoch": 0.57, "learning_rate": 1.4098049024512256e-05, "loss": 0.8359, "step": 530500 }, { "epoch": 0.57, "learning_rate": 1.4083041520760382e-05, "loss": 0.8603, "step": 531000 }, { "epoch": 0.57, "learning_rate": 1.4068034017008504e-05, "loss": 0.8479, "step": 531500 }, { "epoch": 0.57, "learning_rate": 1.4053026513256628e-05, "loss": 0.8581, "step": 532000 }, { "epoch": 0.57, "learning_rate": 1.4038019009504754e-05, "loss": 0.8553, "step": 532500 }, { "epoch": 0.57, "learning_rate": 1.402304152076038e-05, "loss": 0.8564, "step": 533000 }, { "epoch": 0.57, "learning_rate": 1.4008034017008505e-05, "loss": 0.855, "step": 533500 }, { "epoch": 0.57, "learning_rate": 1.3993026513256629e-05, "loss": 0.8573, "step": 534000 }, { "epoch": 0.57, "learning_rate": 1.3978019009504753e-05, "loss": 0.8689, "step": 534500 }, { "epoch": 0.57, "learning_rate": 1.3963041520760381e-05, "loss": 0.8597, "step": 535000 }, { "epoch": 0.57, "learning_rate": 1.3948034017008505e-05, "loss": 0.8429, "step": 535500 }, { "epoch": 0.57, "learning_rate": 1.3933026513256627e-05, "loss": 0.8561, "step": 536000 }, { "epoch": 0.58, "learning_rate": 1.3918019009504753e-05, "loss": 0.8651, "step": 536500 }, { "epoch": 0.58, "learning_rate": 1.3903011505752877e-05, "loss": 0.8675, "step": 537000 }, { "epoch": 0.58, "learning_rate": 1.3888004002001001e-05, "loss": 0.8466, "step": 537500 }, { "epoch": 0.58, "learning_rate": 1.3872996498249125e-05, "loss": 0.8629, "step": 538000 }, { "epoch": 0.58, "learning_rate": 1.3857988994497249e-05, "loss": 0.8458, "step": 538500 }, { "epoch": 0.58, "learning_rate": 1.3843011505752876e-05, "loss": 0.8502, "step": 539000 }, { "epoch": 0.58, "learning_rate": 1.3828004002001e-05, "loss": 0.8528, "step": 539500 }, { "epoch": 0.58, "learning_rate": 1.3812996498249125e-05, "loss": 0.8557, "step": 540000 }, { "epoch": 0.58, "eval_loss": 0.7993029952049255, "eval_runtime": 619.1901, "eval_samples_per_second": 161.501, "eval_steps_per_second": 40.375, "step": 540000 }, { "epoch": 0.58, "learning_rate": 1.379798899449725e-05, "loss": 0.8428, "step": 540500 }, { "epoch": 0.58, "learning_rate": 1.3783011505752876e-05, "loss": 0.8375, "step": 541000 }, { "epoch": 0.58, "learning_rate": 1.3768004002001e-05, "loss": 0.8569, "step": 541500 }, { "epoch": 0.58, "learning_rate": 1.3752996498249126e-05, "loss": 0.8465, "step": 542000 }, { "epoch": 0.58, "learning_rate": 1.3738019009504753e-05, "loss": 0.8545, "step": 542500 }, { "epoch": 0.58, "learning_rate": 1.3723011505752877e-05, "loss": 0.8584, "step": 543000 }, { "epoch": 0.58, "learning_rate": 1.3708004002001001e-05, "loss": 0.855, "step": 543500 }, { "epoch": 0.58, "learning_rate": 1.3692996498249125e-05, "loss": 0.8386, "step": 544000 }, { "epoch": 0.58, "learning_rate": 1.3677988994497249e-05, "loss": 0.8451, "step": 544500 }, { "epoch": 0.58, "learning_rate": 1.3663011505752876e-05, "loss": 0.8503, "step": 545000 }, { "epoch": 0.58, "learning_rate": 1.3648004002001002e-05, "loss": 0.8647, "step": 545500 }, { "epoch": 0.59, "learning_rate": 1.3632996498249125e-05, "loss": 0.8711, "step": 546000 }, { "epoch": 0.59, "learning_rate": 1.3617988994497248e-05, "loss": 0.8475, "step": 546500 }, { "epoch": 0.59, "learning_rate": 1.3602981490745373e-05, "loss": 0.8646, "step": 547000 }, { "epoch": 0.59, "learning_rate": 1.3587973986993497e-05, "loss": 0.8595, "step": 547500 }, { "epoch": 0.59, "learning_rate": 1.3572966483241621e-05, "loss": 0.8466, "step": 548000 }, { "epoch": 0.59, "learning_rate": 1.3557958979489745e-05, "loss": 0.8538, "step": 548500 }, { "epoch": 0.59, "learning_rate": 1.3542981490745374e-05, "loss": 0.849, "step": 549000 }, { "epoch": 0.59, "learning_rate": 1.3527973986993498e-05, "loss": 0.8513, "step": 549500 }, { "epoch": 0.59, "learning_rate": 1.351296648324162e-05, "loss": 0.8447, "step": 550000 }, { "epoch": 0.59, "eval_loss": 0.7979006171226501, "eval_runtime": 598.1411, "eval_samples_per_second": 167.185, "eval_steps_per_second": 41.796, "step": 550000 }, { "epoch": 0.59, "learning_rate": 1.3497958979489746e-05, "loss": 0.839, "step": 550500 }, { "epoch": 0.59, "learning_rate": 1.3482981490745373e-05, "loss": 0.8704, "step": 551000 }, { "epoch": 0.59, "learning_rate": 1.3467973986993497e-05, "loss": 0.8562, "step": 551500 }, { "epoch": 0.59, "learning_rate": 1.345296648324162e-05, "loss": 0.8529, "step": 552000 }, { "epoch": 0.59, "learning_rate": 1.3437958979489746e-05, "loss": 0.8441, "step": 552500 }, { "epoch": 0.59, "learning_rate": 1.3422981490745373e-05, "loss": 0.856, "step": 553000 }, { "epoch": 0.59, "learning_rate": 1.3407973986993497e-05, "loss": 0.849, "step": 553500 }, { "epoch": 0.59, "learning_rate": 1.3392966483241621e-05, "loss": 0.8478, "step": 554000 }, { "epoch": 0.59, "learning_rate": 1.3377958979489745e-05, "loss": 0.8524, "step": 554500 }, { "epoch": 0.6, "learning_rate": 1.3362951475737869e-05, "loss": 0.8477, "step": 555000 }, { "epoch": 0.6, "learning_rate": 1.3347943971985995e-05, "loss": 0.8488, "step": 555500 }, { "epoch": 0.6, "learning_rate": 1.3332996498249125e-05, "loss": 0.8426, "step": 556000 }, { "epoch": 0.6, "learning_rate": 1.3317988994497249e-05, "loss": 0.8702, "step": 556500 }, { "epoch": 0.6, "learning_rate": 1.3302981490745373e-05, "loss": 0.854, "step": 557000 }, { "epoch": 0.6, "learning_rate": 1.3287973986993497e-05, "loss": 0.8539, "step": 557500 }, { "epoch": 0.6, "learning_rate": 1.327296648324162e-05, "loss": 0.8529, "step": 558000 }, { "epoch": 0.6, "learning_rate": 1.3257958979489745e-05, "loss": 0.8548, "step": 558500 }, { "epoch": 0.6, "learning_rate": 1.3242951475737869e-05, "loss": 0.8587, "step": 559000 }, { "epoch": 0.6, "learning_rate": 1.3227943971985994e-05, "loss": 0.8469, "step": 559500 }, { "epoch": 0.6, "learning_rate": 1.3212936468234118e-05, "loss": 0.8529, "step": 560000 }, { "epoch": 0.6, "eval_loss": 0.7950595021247864, "eval_runtime": 602.8951, "eval_samples_per_second": 165.866, "eval_steps_per_second": 41.467, "step": 560000 }, { "epoch": 0.6, "learning_rate": 1.319792896448224e-05, "loss": 0.8347, "step": 560500 }, { "epoch": 0.6, "learning_rate": 1.3182921460730366e-05, "loss": 0.8425, "step": 561000 }, { "epoch": 0.6, "learning_rate": 1.316791395697849e-05, "loss": 0.8658, "step": 561500 }, { "epoch": 0.6, "learning_rate": 1.3152936468234117e-05, "loss": 0.8605, "step": 562000 }, { "epoch": 0.6, "learning_rate": 1.3137928964482241e-05, "loss": 0.8643, "step": 562500 }, { "epoch": 0.6, "learning_rate": 1.3122921460730367e-05, "loss": 0.8543, "step": 563000 }, { "epoch": 0.6, "learning_rate": 1.3107913956978489e-05, "loss": 0.8436, "step": 563500 }, { "epoch": 0.6, "learning_rate": 1.3092906453226613e-05, "loss": 0.8369, "step": 564000 }, { "epoch": 0.61, "learning_rate": 1.3077928964482241e-05, "loss": 0.8563, "step": 564500 }, { "epoch": 0.61, "learning_rate": 1.3062921460730365e-05, "loss": 0.8431, "step": 565000 }, { "epoch": 0.61, "learning_rate": 1.304791395697849e-05, "loss": 0.8434, "step": 565500 }, { "epoch": 0.61, "learning_rate": 1.3032906453226613e-05, "loss": 0.8527, "step": 566000 }, { "epoch": 0.61, "learning_rate": 1.3017898949474737e-05, "loss": 0.8626, "step": 566500 }, { "epoch": 0.61, "learning_rate": 1.3002891445722861e-05, "loss": 0.8644, "step": 567000 }, { "epoch": 0.61, "learning_rate": 1.2987883941970987e-05, "loss": 0.8478, "step": 567500 }, { "epoch": 0.61, "learning_rate": 1.297287643821911e-05, "loss": 0.8429, "step": 568000 }, { "epoch": 0.61, "learning_rate": 1.2957898949474738e-05, "loss": 0.8492, "step": 568500 }, { "epoch": 0.61, "learning_rate": 1.2942891445722862e-05, "loss": 0.8498, "step": 569000 }, { "epoch": 0.61, "learning_rate": 1.2927883941970987e-05, "loss": 0.8469, "step": 569500 }, { "epoch": 0.61, "learning_rate": 1.2912906453226614e-05, "loss": 0.8568, "step": 570000 }, { "epoch": 0.61, "eval_loss": 0.7931195497512817, "eval_runtime": 601.1433, "eval_samples_per_second": 166.35, "eval_steps_per_second": 41.587, "step": 570000 }, { "epoch": 0.61, "learning_rate": 1.2897898949474738e-05, "loss": 0.8439, "step": 570500 }, { "epoch": 0.61, "learning_rate": 1.288289144572286e-05, "loss": 0.8329, "step": 571000 }, { "epoch": 0.61, "learning_rate": 1.2867883941970986e-05, "loss": 0.864, "step": 571500 }, { "epoch": 0.61, "learning_rate": 1.285287643821911e-05, "loss": 0.8517, "step": 572000 }, { "epoch": 0.61, "learning_rate": 1.2837898949474737e-05, "loss": 0.8559, "step": 572500 }, { "epoch": 0.61, "learning_rate": 1.2822891445722861e-05, "loss": 0.8606, "step": 573000 }, { "epoch": 0.61, "learning_rate": 1.280791395697849e-05, "loss": 0.8449, "step": 573500 }, { "epoch": 0.62, "learning_rate": 1.2792906453226614e-05, "loss": 0.8377, "step": 574000 }, { "epoch": 0.62, "learning_rate": 1.2777898949474738e-05, "loss": 0.8525, "step": 574500 }, { "epoch": 0.62, "learning_rate": 1.2762891445722862e-05, "loss": 0.8573, "step": 575000 }, { "epoch": 0.62, "learning_rate": 1.2747883941970986e-05, "loss": 0.851, "step": 575500 }, { "epoch": 0.62, "learning_rate": 1.273287643821911e-05, "loss": 0.8596, "step": 576000 }, { "epoch": 0.62, "learning_rate": 1.2717898949474738e-05, "loss": 0.8489, "step": 576500 }, { "epoch": 0.62, "learning_rate": 1.270289144572286e-05, "loss": 0.8385, "step": 577000 }, { "epoch": 0.62, "learning_rate": 1.2687883941970986e-05, "loss": 0.839, "step": 577500 }, { "epoch": 0.62, "learning_rate": 1.267287643821911e-05, "loss": 0.8542, "step": 578000 }, { "epoch": 0.62, "learning_rate": 1.2657868934467232e-05, "loss": 0.8386, "step": 578500 }, { "epoch": 0.62, "learning_rate": 1.2642861430715358e-05, "loss": 0.8556, "step": 579000 }, { "epoch": 0.62, "learning_rate": 1.2627853926963482e-05, "loss": 0.8598, "step": 579500 }, { "epoch": 0.62, "learning_rate": 1.2612846423211606e-05, "loss": 0.858, "step": 580000 }, { "epoch": 0.62, "eval_loss": 0.7923389077186584, "eval_runtime": 603.6948, "eval_samples_per_second": 165.647, "eval_steps_per_second": 41.412, "step": 580000 }, { "epoch": 0.62, "learning_rate": 1.259783891945973e-05, "loss": 0.8411, "step": 580500 }, { "epoch": 0.62, "learning_rate": 1.2582831415707854e-05, "loss": 0.8443, "step": 581000 }, { "epoch": 0.62, "learning_rate": 1.2567853926963483e-05, "loss": 0.8564, "step": 581500 }, { "epoch": 0.62, "learning_rate": 1.2552846423211606e-05, "loss": 0.8485, "step": 582000 }, { "epoch": 0.62, "learning_rate": 1.253783891945973e-05, "loss": 0.8542, "step": 582500 }, { "epoch": 0.63, "learning_rate": 1.2522831415707854e-05, "loss": 0.846, "step": 583000 }, { "epoch": 0.63, "learning_rate": 1.2507823911955978e-05, "loss": 0.8492, "step": 583500 }, { "epoch": 0.63, "learning_rate": 1.2492816408204102e-05, "loss": 0.8476, "step": 584000 }, { "epoch": 0.63, "learning_rate": 1.2477808904452226e-05, "loss": 0.85, "step": 584500 }, { "epoch": 0.63, "learning_rate": 1.2462801400700352e-05, "loss": 0.8449, "step": 585000 }, { "epoch": 0.63, "learning_rate": 1.2447853926963482e-05, "loss": 0.8547, "step": 585500 }, { "epoch": 0.63, "learning_rate": 1.2432846423211608e-05, "loss": 0.835, "step": 586000 }, { "epoch": 0.63, "learning_rate": 1.241783891945973e-05, "loss": 0.8502, "step": 586500 }, { "epoch": 0.63, "learning_rate": 1.2402831415707854e-05, "loss": 0.8596, "step": 587000 }, { "epoch": 0.63, "learning_rate": 1.238782391195598e-05, "loss": 0.8586, "step": 587500 }, { "epoch": 0.63, "learning_rate": 1.2372816408204102e-05, "loss": 0.8478, "step": 588000 }, { "epoch": 0.63, "learning_rate": 1.2357808904452226e-05, "loss": 0.8472, "step": 588500 }, { "epoch": 0.63, "learning_rate": 1.2342831415707854e-05, "loss": 0.8523, "step": 589000 }, { "epoch": 0.63, "learning_rate": 1.2327823911955978e-05, "loss": 0.8395, "step": 589500 }, { "epoch": 0.63, "learning_rate": 1.2312816408204102e-05, "loss": 0.8573, "step": 590000 }, { "epoch": 0.63, "eval_loss": 0.7895151376724243, "eval_runtime": 593.5374, "eval_samples_per_second": 168.481, "eval_steps_per_second": 42.12, "step": 590000 }, { "epoch": 0.63, "learning_rate": 1.2297808904452226e-05, "loss": 0.8431, "step": 590500 }, { "epoch": 0.63, "learning_rate": 1.228280140070035e-05, "loss": 0.8416, "step": 591000 }, { "epoch": 0.63, "learning_rate": 1.2267793896948474e-05, "loss": 0.8563, "step": 591500 }, { "epoch": 0.63, "learning_rate": 1.22527863931966e-05, "loss": 0.8352, "step": 592000 }, { "epoch": 0.64, "learning_rate": 1.2237778889444724e-05, "loss": 0.8399, "step": 592500 }, { "epoch": 0.64, "learning_rate": 1.222280140070035e-05, "loss": 0.841, "step": 593000 }, { "epoch": 0.64, "learning_rate": 1.220782391195598e-05, "loss": 0.8455, "step": 593500 }, { "epoch": 0.64, "learning_rate": 1.2192816408204102e-05, "loss": 0.8493, "step": 594000 }, { "epoch": 0.64, "learning_rate": 1.2177808904452226e-05, "loss": 0.8465, "step": 594500 }, { "epoch": 0.64, "learning_rate": 1.2162801400700351e-05, "loss": 0.8478, "step": 595000 }, { "epoch": 0.64, "learning_rate": 1.2147793896948473e-05, "loss": 0.8447, "step": 595500 }, { "epoch": 0.64, "learning_rate": 1.2132786393196599e-05, "loss": 0.8333, "step": 596000 }, { "epoch": 0.64, "learning_rate": 1.2117778889444723e-05, "loss": 0.8496, "step": 596500 }, { "epoch": 0.64, "learning_rate": 1.2102771385692847e-05, "loss": 0.8357, "step": 597000 }, { "epoch": 0.64, "learning_rate": 1.2087793896948474e-05, "loss": 0.8534, "step": 597500 }, { "epoch": 0.64, "learning_rate": 1.20727863931966e-05, "loss": 0.8443, "step": 598000 }, { "epoch": 0.64, "learning_rate": 1.2057778889444722e-05, "loss": 0.8482, "step": 598500 }, { "epoch": 0.64, "learning_rate": 1.2042771385692846e-05, "loss": 0.8462, "step": 599000 }, { "epoch": 0.64, "learning_rate": 1.2027823911955978e-05, "loss": 0.8487, "step": 599500 }, { "epoch": 0.64, "learning_rate": 1.2012816408204102e-05, "loss": 0.8527, "step": 600000 }, { "epoch": 0.64, "eval_loss": 0.7874695062637329, "eval_runtime": 597.2742, "eval_samples_per_second": 167.427, "eval_steps_per_second": 41.857, "step": 600000 }, { "epoch": 0.64, "learning_rate": 1.1997808904452227e-05, "loss": 0.8327, "step": 600500 }, { "epoch": 0.64, "learning_rate": 1.1982801400700351e-05, "loss": 0.8275, "step": 601000 }, { "epoch": 0.65, "learning_rate": 1.1967793896948473e-05, "loss": 0.8332, "step": 601500 }, { "epoch": 0.65, "learning_rate": 1.1952786393196599e-05, "loss": 0.8389, "step": 602000 }, { "epoch": 0.65, "learning_rate": 1.1937778889444723e-05, "loss": 0.8557, "step": 602500 }, { "epoch": 0.65, "learning_rate": 1.1922771385692845e-05, "loss": 0.8465, "step": 603000 }, { "epoch": 0.65, "learning_rate": 1.1907793896948474e-05, "loss": 0.8416, "step": 603500 }, { "epoch": 0.65, "learning_rate": 1.18927863931966e-05, "loss": 0.8341, "step": 604000 }, { "epoch": 0.65, "learning_rate": 1.1877778889444722e-05, "loss": 0.8451, "step": 604500 }, { "epoch": 0.65, "learning_rate": 1.1862771385692846e-05, "loss": 0.8517, "step": 605000 }, { "epoch": 0.65, "learning_rate": 1.1847763881940971e-05, "loss": 0.8417, "step": 605500 }, { "epoch": 0.65, "learning_rate": 1.1832756378189095e-05, "loss": 0.8435, "step": 606000 }, { "epoch": 0.65, "learning_rate": 1.181774887443722e-05, "loss": 0.855, "step": 606500 }, { "epoch": 0.65, "learning_rate": 1.1802741370685343e-05, "loss": 0.8568, "step": 607000 }, { "epoch": 0.65, "learning_rate": 1.1787793896948474e-05, "loss": 0.8465, "step": 607500 }, { "epoch": 0.65, "learning_rate": 1.1772786393196599e-05, "loss": 0.8427, "step": 608000 }, { "epoch": 0.65, "learning_rate": 1.1757778889444723e-05, "loss": 0.8507, "step": 608500 }, { "epoch": 0.65, "learning_rate": 1.1742771385692845e-05, "loss": 0.8348, "step": 609000 }, { "epoch": 0.65, "learning_rate": 1.1727763881940971e-05, "loss": 0.8327, "step": 609500 }, { "epoch": 0.65, "learning_rate": 1.1712756378189095e-05, "loss": 0.8321, "step": 610000 }, { "epoch": 0.65, "eval_loss": 0.7858200073242188, "eval_runtime": 639.6935, "eval_samples_per_second": 156.325, "eval_steps_per_second": 39.081, "step": 610000 }, { "epoch": 0.65, "learning_rate": 1.1697748874437219e-05, "loss": 0.8321, "step": 610500 }, { "epoch": 0.66, "learning_rate": 1.1682741370685343e-05, "loss": 0.8484, "step": 611000 }, { "epoch": 0.66, "learning_rate": 1.1667733866933467e-05, "loss": 0.8498, "step": 611500 }, { "epoch": 0.66, "learning_rate": 1.1652726363181592e-05, "loss": 0.8476, "step": 612000 }, { "epoch": 0.66, "learning_rate": 1.1637718859429715e-05, "loss": 0.8507, "step": 612500 }, { "epoch": 0.66, "learning_rate": 1.1622711355677838e-05, "loss": 0.8308, "step": 613000 }, { "epoch": 0.66, "learning_rate": 1.1607733866933467e-05, "loss": 0.8443, "step": 613500 }, { "epoch": 0.66, "learning_rate": 1.1592726363181591e-05, "loss": 0.8424, "step": 614000 }, { "epoch": 0.66, "learning_rate": 1.1577718859429715e-05, "loss": 0.8445, "step": 614500 }, { "epoch": 0.66, "learning_rate": 1.1562711355677839e-05, "loss": 0.8503, "step": 615000 }, { "epoch": 0.66, "learning_rate": 1.1547703851925963e-05, "loss": 0.8312, "step": 615500 }, { "epoch": 0.66, "learning_rate": 1.1532696348174087e-05, "loss": 0.8474, "step": 616000 }, { "epoch": 0.66, "learning_rate": 1.151768884442221e-05, "loss": 0.8477, "step": 616500 }, { "epoch": 0.66, "learning_rate": 1.1502711355677838e-05, "loss": 0.8313, "step": 617000 }, { "epoch": 0.66, "learning_rate": 1.1487703851925964e-05, "loss": 0.8423, "step": 617500 }, { "epoch": 0.66, "learning_rate": 1.1472696348174087e-05, "loss": 0.8389, "step": 618000 }, { "epoch": 0.66, "learning_rate": 1.1457688844422211e-05, "loss": 0.8361, "step": 618500 }, { "epoch": 0.66, "learning_rate": 1.1442711355677838e-05, "loss": 0.847, "step": 619000 }, { "epoch": 0.66, "learning_rate": 1.1427703851925964e-05, "loss": 0.8392, "step": 619500 }, { "epoch": 0.66, "learning_rate": 1.1412696348174086e-05, "loss": 0.828, "step": 620000 }, { "epoch": 0.66, "eval_loss": 0.7836601734161377, "eval_runtime": 599.8827, "eval_samples_per_second": 166.699, "eval_steps_per_second": 41.675, "step": 620000 }, { "epoch": 0.67, "learning_rate": 1.1397688844422212e-05, "loss": 0.8334, "step": 620500 }, { "epoch": 0.67, "learning_rate": 1.1382681340670336e-05, "loss": 0.8389, "step": 621000 }, { "epoch": 0.67, "learning_rate": 1.136767383691846e-05, "loss": 0.8306, "step": 621500 }, { "epoch": 0.67, "learning_rate": 1.1352666333166584e-05, "loss": 0.8488, "step": 622000 }, { "epoch": 0.67, "learning_rate": 1.1337658829414708e-05, "loss": 0.8244, "step": 622500 }, { "epoch": 0.67, "learning_rate": 1.1322681340670335e-05, "loss": 0.8449, "step": 623000 }, { "epoch": 0.67, "learning_rate": 1.1307673836918459e-05, "loss": 0.8417, "step": 623500 }, { "epoch": 0.67, "learning_rate": 1.1292666333166584e-05, "loss": 0.8319, "step": 624000 }, { "epoch": 0.67, "learning_rate": 1.1277658829414708e-05, "loss": 0.8309, "step": 624500 }, { "epoch": 0.67, "learning_rate": 1.126265132566283e-05, "loss": 0.8326, "step": 625000 }, { "epoch": 0.67, "learning_rate": 1.124767383691846e-05, "loss": 0.8243, "step": 625500 }, { "epoch": 0.67, "learning_rate": 1.1232696348174086e-05, "loss": 0.834, "step": 626000 }, { "epoch": 0.67, "learning_rate": 1.1217688844422212e-05, "loss": 0.8355, "step": 626500 }, { "epoch": 0.67, "learning_rate": 1.1202711355677839e-05, "loss": 0.8326, "step": 627000 }, { "epoch": 0.67, "learning_rate": 1.1187703851925963e-05, "loss": 0.8316, "step": 627500 }, { "epoch": 0.67, "learning_rate": 1.1172696348174087e-05, "loss": 0.8426, "step": 628000 }, { "epoch": 0.67, "learning_rate": 1.1157688844422213e-05, "loss": 0.8407, "step": 628500 }, { "epoch": 0.67, "learning_rate": 1.1142681340670335e-05, "loss": 0.8292, "step": 629000 }, { "epoch": 0.68, "learning_rate": 1.1127673836918459e-05, "loss": 0.8412, "step": 629500 }, { "epoch": 0.68, "learning_rate": 1.1112666333166584e-05, "loss": 0.8378, "step": 630000 }, { "epoch": 0.68, "eval_loss": 0.7810372114181519, "eval_runtime": 608.8449, "eval_samples_per_second": 164.245, "eval_steps_per_second": 41.061, "step": 630000 }, { "epoch": 0.68, "learning_rate": 1.1097658829414707e-05, "loss": 0.832, "step": 630500 }, { "epoch": 0.68, "learning_rate": 1.108265132566283e-05, "loss": 0.8393, "step": 631000 }, { "epoch": 0.68, "learning_rate": 1.106767383691846e-05, "loss": 0.8387, "step": 631500 }, { "epoch": 0.68, "learning_rate": 1.1052666333166583e-05, "loss": 0.8366, "step": 632000 }, { "epoch": 0.68, "learning_rate": 1.1037688844422212e-05, "loss": 0.8145, "step": 632500 }, { "epoch": 0.68, "learning_rate": 1.1022681340670336e-05, "loss": 0.8275, "step": 633000 }, { "epoch": 0.68, "learning_rate": 1.1007673836918458e-05, "loss": 0.8372, "step": 633500 }, { "epoch": 0.68, "learning_rate": 1.0992666333166584e-05, "loss": 0.8224, "step": 634000 }, { "epoch": 0.68, "learning_rate": 1.0977658829414708e-05, "loss": 0.8247, "step": 634500 }, { "epoch": 0.68, "learning_rate": 1.0962651325662832e-05, "loss": 0.8397, "step": 635000 }, { "epoch": 0.68, "learning_rate": 1.0947643821910956e-05, "loss": 0.8302, "step": 635500 }, { "epoch": 0.68, "learning_rate": 1.0932666333166584e-05, "loss": 0.8334, "step": 636000 }, { "epoch": 0.68, "learning_rate": 1.0917658829414707e-05, "loss": 0.8377, "step": 636500 }, { "epoch": 0.68, "learning_rate": 1.0902651325662832e-05, "loss": 0.816, "step": 637000 }, { "epoch": 0.68, "learning_rate": 1.0887643821910956e-05, "loss": 0.8293, "step": 637500 }, { "epoch": 0.68, "learning_rate": 1.087263631815908e-05, "loss": 0.8414, "step": 638000 }, { "epoch": 0.68, "learning_rate": 1.0857658829414707e-05, "loss": 0.8348, "step": 638500 }, { "epoch": 0.69, "learning_rate": 1.0842651325662833e-05, "loss": 0.8368, "step": 639000 }, { "epoch": 0.69, "learning_rate": 1.0827643821910955e-05, "loss": 0.8547, "step": 639500 }, { "epoch": 0.69, "learning_rate": 1.0812636318159079e-05, "loss": 0.842, "step": 640000 }, { "epoch": 0.69, "eval_loss": 0.7787752151489258, "eval_runtime": 597.0129, "eval_samples_per_second": 167.501, "eval_steps_per_second": 41.875, "step": 640000 }, { "epoch": 0.69, "learning_rate": 1.0797628814407205e-05, "loss": 0.8327, "step": 640500 }, { "epoch": 0.69, "learning_rate": 1.0782621310655329e-05, "loss": 0.8286, "step": 641000 }, { "epoch": 0.69, "learning_rate": 1.076761380690345e-05, "loss": 0.8293, "step": 641500 }, { "epoch": 0.69, "learning_rate": 1.0752606303151576e-05, "loss": 0.8377, "step": 642000 }, { "epoch": 0.69, "learning_rate": 1.07375987993997e-05, "loss": 0.8163, "step": 642500 }, { "epoch": 0.69, "learning_rate": 1.0722591295647823e-05, "loss": 0.8428, "step": 643000 }, { "epoch": 0.69, "learning_rate": 1.0707583791895948e-05, "loss": 0.8245, "step": 643500 }, { "epoch": 0.69, "learning_rate": 1.0692576288144072e-05, "loss": 0.8317, "step": 644000 }, { "epoch": 0.69, "learning_rate": 1.06775987993997e-05, "loss": 0.8223, "step": 644500 }, { "epoch": 0.69, "learning_rate": 1.0662591295647825e-05, "loss": 0.8298, "step": 645000 }, { "epoch": 0.69, "learning_rate": 1.0647583791895949e-05, "loss": 0.8304, "step": 645500 }, { "epoch": 0.69, "learning_rate": 1.0632576288144073e-05, "loss": 0.8417, "step": 646000 }, { "epoch": 0.69, "learning_rate": 1.06175987993997e-05, "loss": 0.8283, "step": 646500 }, { "epoch": 0.69, "learning_rate": 1.0602591295647825e-05, "loss": 0.8296, "step": 647000 }, { "epoch": 0.69, "learning_rate": 1.0587583791895948e-05, "loss": 0.825, "step": 647500 }, { "epoch": 0.69, "learning_rate": 1.0572576288144072e-05, "loss": 0.8312, "step": 648000 }, { "epoch": 0.7, "learning_rate": 1.0557568784392197e-05, "loss": 0.8393, "step": 648500 }, { "epoch": 0.7, "learning_rate": 1.0542561280640321e-05, "loss": 0.838, "step": 649000 }, { "epoch": 0.7, "learning_rate": 1.0527553776888443e-05, "loss": 0.8271, "step": 649500 }, { "epoch": 0.7, "learning_rate": 1.0512576288144072e-05, "loss": 0.8348, "step": 650000 }, { "epoch": 0.7, "eval_loss": 0.7790135145187378, "eval_runtime": 606.4656, "eval_samples_per_second": 164.89, "eval_steps_per_second": 41.222, "step": 650000 }, { "epoch": 0.7, "learning_rate": 1.0497568784392196e-05, "loss": 0.8234, "step": 650500 }, { "epoch": 0.7, "learning_rate": 1.048256128064032e-05, "loss": 0.8434, "step": 651000 }, { "epoch": 0.7, "learning_rate": 1.0467553776888444e-05, "loss": 0.8387, "step": 651500 }, { "epoch": 0.7, "learning_rate": 1.045254627313657e-05, "loss": 0.8255, "step": 652000 }, { "epoch": 0.7, "learning_rate": 1.0437538769384692e-05, "loss": 0.8211, "step": 652500 }, { "epoch": 0.7, "learning_rate": 1.0422531265632816e-05, "loss": 0.8452, "step": 653000 }, { "epoch": 0.7, "learning_rate": 1.0407553776888445e-05, "loss": 0.8278, "step": 653500 }, { "epoch": 0.7, "learning_rate": 1.0392546273136568e-05, "loss": 0.8275, "step": 654000 }, { "epoch": 0.7, "learning_rate": 1.0377538769384692e-05, "loss": 0.8328, "step": 654500 }, { "epoch": 0.7, "learning_rate": 1.0362531265632818e-05, "loss": 0.8127, "step": 655000 }, { "epoch": 0.7, "learning_rate": 1.034752376188094e-05, "loss": 0.8171, "step": 655500 }, { "epoch": 0.7, "learning_rate": 1.0332516258129064e-05, "loss": 0.8297, "step": 656000 }, { "epoch": 0.7, "learning_rate": 1.031750875437719e-05, "loss": 0.8299, "step": 656500 }, { "epoch": 0.7, "learning_rate": 1.0302501250625314e-05, "loss": 0.8343, "step": 657000 }, { "epoch": 0.71, "learning_rate": 1.028752376188094e-05, "loss": 0.8344, "step": 657500 }, { "epoch": 0.71, "learning_rate": 1.027254627313657e-05, "loss": 0.8232, "step": 658000 }, { "epoch": 0.71, "learning_rate": 1.0257538769384692e-05, "loss": 0.83, "step": 658500 }, { "epoch": 0.71, "learning_rate": 1.0242531265632817e-05, "loss": 0.8321, "step": 659000 }, { "epoch": 0.71, "learning_rate": 1.0227523761880941e-05, "loss": 0.8327, "step": 659500 }, { "epoch": 0.71, "learning_rate": 1.0212516258129064e-05, "loss": 0.8217, "step": 660000 }, { "epoch": 0.71, "eval_loss": 0.7772210836410522, "eval_runtime": 623.462, "eval_samples_per_second": 160.395, "eval_steps_per_second": 40.099, "step": 660000 }, { "epoch": 0.71, "learning_rate": 1.019750875437719e-05, "loss": 0.8316, "step": 660500 }, { "epoch": 0.71, "learning_rate": 1.0182501250625313e-05, "loss": 0.8346, "step": 661000 }, { "epoch": 0.71, "learning_rate": 1.0167493746873437e-05, "loss": 0.8353, "step": 661500 }, { "epoch": 0.71, "learning_rate": 1.0152516258129064e-05, "loss": 0.8295, "step": 662000 }, { "epoch": 0.71, "learning_rate": 1.013750875437719e-05, "loss": 0.8361, "step": 662500 }, { "epoch": 0.71, "learning_rate": 1.0122501250625312e-05, "loss": 0.8363, "step": 663000 }, { "epoch": 0.71, "learning_rate": 1.0107493746873436e-05, "loss": 0.8393, "step": 663500 }, { "epoch": 0.71, "learning_rate": 1.0092516258129065e-05, "loss": 0.837, "step": 664000 }, { "epoch": 0.71, "learning_rate": 1.0077538769384692e-05, "loss": 0.8273, "step": 664500 }, { "epoch": 0.71, "learning_rate": 1.0062531265632817e-05, "loss": 0.8308, "step": 665000 }, { "epoch": 0.71, "learning_rate": 1.0047523761880941e-05, "loss": 0.8322, "step": 665500 }, { "epoch": 0.71, "learning_rate": 1.0032516258129064e-05, "loss": 0.8109, "step": 666000 }, { "epoch": 0.71, "learning_rate": 1.0017538769384692e-05, "loss": 0.8194, "step": 666500 }, { "epoch": 0.72, "learning_rate": 1.0002531265632816e-05, "loss": 0.8341, "step": 667000 }, { "epoch": 0.72, "learning_rate": 9.98752376188094e-06, "loss": 0.8371, "step": 667500 }, { "epoch": 0.72, "learning_rate": 9.972516258129064e-06, "loss": 0.8252, "step": 668000 }, { "epoch": 0.72, "learning_rate": 9.95750875437719e-06, "loss": 0.8276, "step": 668500 }, { "epoch": 0.72, "learning_rate": 9.942501250625312e-06, "loss": 0.8333, "step": 669000 }, { "epoch": 0.72, "learning_rate": 9.927493746873436e-06, "loss": 0.8287, "step": 669500 }, { "epoch": 0.72, "learning_rate": 9.912516258129065e-06, "loss": 0.8309, "step": 670000 }, { "epoch": 0.72, "eval_loss": 0.7751027941703796, "eval_runtime": 657.5511, "eval_samples_per_second": 152.079, "eval_steps_per_second": 38.02, "step": 670000 }, { "epoch": 0.72, "learning_rate": 9.897508754377189e-06, "loss": 0.8265, "step": 670500 }, { "epoch": 0.72, "learning_rate": 9.882501250625313e-06, "loss": 0.8296, "step": 671000 }, { "epoch": 0.72, "learning_rate": 9.867493746873438e-06, "loss": 0.8207, "step": 671500 }, { "epoch": 0.72, "learning_rate": 9.85248624312156e-06, "loss": 0.8193, "step": 672000 }, { "epoch": 0.72, "learning_rate": 9.837478739369684e-06, "loss": 0.8323, "step": 672500 }, { "epoch": 0.72, "learning_rate": 9.822501250625313e-06, "loss": 0.8389, "step": 673000 }, { "epoch": 0.72, "learning_rate": 9.807493746873437e-06, "loss": 0.8307, "step": 673500 }, { "epoch": 0.72, "learning_rate": 9.792486243121561e-06, "loss": 0.8229, "step": 674000 }, { "epoch": 0.72, "learning_rate": 9.777478739369685e-06, "loss": 0.8343, "step": 674500 }, { "epoch": 0.72, "learning_rate": 9.762471235617809e-06, "loss": 0.8317, "step": 675000 }, { "epoch": 0.72, "learning_rate": 9.747493746873438e-06, "loss": 0.8269, "step": 675500 }, { "epoch": 0.72, "learning_rate": 9.732486243121562e-06, "loss": 0.8238, "step": 676000 }, { "epoch": 0.73, "learning_rate": 9.717478739369684e-06, "loss": 0.8096, "step": 676500 }, { "epoch": 0.73, "learning_rate": 9.70247123561781e-06, "loss": 0.8251, "step": 677000 }, { "epoch": 0.73, "learning_rate": 9.687463731865933e-06, "loss": 0.8358, "step": 677500 }, { "epoch": 0.73, "learning_rate": 9.672456228114057e-06, "loss": 0.8155, "step": 678000 }, { "epoch": 0.73, "learning_rate": 9.657448724362181e-06, "loss": 0.8232, "step": 678500 }, { "epoch": 0.73, "learning_rate": 9.642441220610305e-06, "loss": 0.8185, "step": 679000 }, { "epoch": 0.73, "learning_rate": 9.627463731865932e-06, "loss": 0.8348, "step": 679500 }, { "epoch": 0.73, "learning_rate": 9.612456228114056e-06, "loss": 0.8094, "step": 680000 }, { "epoch": 0.73, "eval_loss": 0.7731601595878601, "eval_runtime": 620.6555, "eval_samples_per_second": 161.12, "eval_steps_per_second": 40.28, "step": 680000 }, { "epoch": 0.73, "learning_rate": 9.597448724362182e-06, "loss": 0.8311, "step": 680500 }, { "epoch": 0.73, "learning_rate": 9.582441220610306e-06, "loss": 0.818, "step": 681000 }, { "epoch": 0.73, "learning_rate": 9.56743371685843e-06, "loss": 0.8162, "step": 681500 }, { "epoch": 0.73, "learning_rate": 9.552426213106554e-06, "loss": 0.8283, "step": 682000 }, { "epoch": 0.73, "learning_rate": 9.537418709354678e-06, "loss": 0.8117, "step": 682500 }, { "epoch": 0.73, "learning_rate": 9.522441220610305e-06, "loss": 0.8371, "step": 683000 }, { "epoch": 0.73, "learning_rate": 9.50743371685843e-06, "loss": 0.8156, "step": 683500 }, { "epoch": 0.73, "learning_rate": 9.492426213106554e-06, "loss": 0.828, "step": 684000 }, { "epoch": 0.73, "learning_rate": 9.477418709354677e-06, "loss": 0.8315, "step": 684500 }, { "epoch": 0.73, "learning_rate": 9.462411205602802e-06, "loss": 0.8242, "step": 685000 }, { "epoch": 0.74, "learning_rate": 9.447403701850926e-06, "loss": 0.8334, "step": 685500 }, { "epoch": 0.74, "learning_rate": 9.43239619809905e-06, "loss": 0.8207, "step": 686000 }, { "epoch": 0.74, "learning_rate": 9.417388694347174e-06, "loss": 0.8318, "step": 686500 }, { "epoch": 0.74, "learning_rate": 9.402411205602803e-06, "loss": 0.8288, "step": 687000 }, { "epoch": 0.74, "learning_rate": 9.38743371685843e-06, "loss": 0.8319, "step": 687500 }, { "epoch": 0.74, "learning_rate": 9.372426213106554e-06, "loss": 0.8174, "step": 688000 }, { "epoch": 0.74, "learning_rate": 9.357418709354678e-06, "loss": 0.8295, "step": 688500 }, { "epoch": 0.74, "learning_rate": 9.342411205602802e-06, "loss": 0.8265, "step": 689000 }, { "epoch": 0.74, "learning_rate": 9.327403701850926e-06, "loss": 0.8263, "step": 689500 }, { "epoch": 0.74, "learning_rate": 9.31239619809905e-06, "loss": 0.826, "step": 690000 }, { "epoch": 0.74, "eval_loss": 0.7714752554893494, "eval_runtime": 616.7754, "eval_samples_per_second": 162.134, "eval_steps_per_second": 40.533, "step": 690000 }, { "epoch": 0.74, "learning_rate": 9.297418709354677e-06, "loss": 0.8202, "step": 690500 }, { "epoch": 0.74, "learning_rate": 9.282411205602802e-06, "loss": 0.8123, "step": 691000 }, { "epoch": 0.74, "learning_rate": 9.267403701850926e-06, "loss": 0.8179, "step": 691500 }, { "epoch": 0.74, "learning_rate": 9.252396198099048e-06, "loss": 0.8245, "step": 692000 }, { "epoch": 0.74, "learning_rate": 9.237388694347174e-06, "loss": 0.8257, "step": 692500 }, { "epoch": 0.74, "learning_rate": 9.222381190595298e-06, "loss": 0.8111, "step": 693000 }, { "epoch": 0.74, "learning_rate": 9.207403701850925e-06, "loss": 0.821, "step": 693500 }, { "epoch": 0.74, "learning_rate": 9.192396198099049e-06, "loss": 0.8179, "step": 694000 }, { "epoch": 0.74, "learning_rate": 9.177388694347175e-06, "loss": 0.8151, "step": 694500 }, { "epoch": 0.75, "learning_rate": 9.162381190595297e-06, "loss": 0.8411, "step": 695000 }, { "epoch": 0.75, "learning_rate": 9.147403701850926e-06, "loss": 0.8441, "step": 695500 }, { "epoch": 0.75, "learning_rate": 9.132396198099051e-06, "loss": 0.8214, "step": 696000 }, { "epoch": 0.75, "learning_rate": 9.117388694347173e-06, "loss": 0.8247, "step": 696500 }, { "epoch": 0.75, "learning_rate": 9.102381190595297e-06, "loss": 0.8196, "step": 697000 }, { "epoch": 0.75, "learning_rate": 9.087373686843423e-06, "loss": 0.8204, "step": 697500 }, { "epoch": 0.75, "learning_rate": 9.072366183091547e-06, "loss": 0.8126, "step": 698000 }, { "epoch": 0.75, "learning_rate": 9.057388694347174e-06, "loss": 0.8344, "step": 698500 }, { "epoch": 0.75, "learning_rate": 9.042381190595298e-06, "loss": 0.825, "step": 699000 }, { "epoch": 0.75, "learning_rate": 9.027373686843422e-06, "loss": 0.8234, "step": 699500 }, { "epoch": 0.75, "learning_rate": 9.01239619809905e-06, "loss": 0.8289, "step": 700000 }, { "epoch": 0.75, "eval_loss": 0.7685481905937195, "eval_runtime": 617.0203, "eval_samples_per_second": 162.069, "eval_steps_per_second": 40.517, "step": 700000 }, { "epoch": 0.75, "learning_rate": 8.997388694347175e-06, "loss": 0.8284, "step": 700500 }, { "epoch": 0.75, "learning_rate": 8.982381190595297e-06, "loss": 0.8129, "step": 701000 }, { "epoch": 0.75, "learning_rate": 8.967373686843422e-06, "loss": 0.8313, "step": 701500 }, { "epoch": 0.75, "learning_rate": 8.952366183091546e-06, "loss": 0.813, "step": 702000 }, { "epoch": 0.75, "learning_rate": 8.93735867933967e-06, "loss": 0.8221, "step": 702500 }, { "epoch": 0.75, "learning_rate": 8.922351175587794e-06, "loss": 0.8273, "step": 703000 }, { "epoch": 0.75, "learning_rate": 8.907343671835918e-06, "loss": 0.8329, "step": 703500 }, { "epoch": 0.75, "learning_rate": 8.892366183091545e-06, "loss": 0.8303, "step": 704000 }, { "epoch": 0.76, "learning_rate": 8.87735867933967e-06, "loss": 0.8078, "step": 704500 }, { "epoch": 0.76, "learning_rate": 8.862381190595298e-06, "loss": 0.8184, "step": 705000 }, { "epoch": 0.76, "learning_rate": 8.847373686843422e-06, "loss": 0.8168, "step": 705500 }, { "epoch": 0.76, "learning_rate": 8.832366183091546e-06, "loss": 0.8302, "step": 706000 }, { "epoch": 0.76, "learning_rate": 8.817388694347175e-06, "loss": 0.8239, "step": 706500 }, { "epoch": 0.76, "learning_rate": 8.802381190595297e-06, "loss": 0.8246, "step": 707000 }, { "epoch": 0.76, "learning_rate": 8.787373686843422e-06, "loss": 0.8196, "step": 707500 }, { "epoch": 0.76, "learning_rate": 8.772366183091546e-06, "loss": 0.8266, "step": 708000 }, { "epoch": 0.76, "learning_rate": 8.757358679339669e-06, "loss": 0.8321, "step": 708500 }, { "epoch": 0.76, "learning_rate": 8.742351175587794e-06, "loss": 0.8268, "step": 709000 }, { "epoch": 0.76, "learning_rate": 8.727343671835918e-06, "loss": 0.8253, "step": 709500 }, { "epoch": 0.76, "learning_rate": 8.712366183091545e-06, "loss": 0.8222, "step": 710000 }, { "epoch": 0.76, "eval_loss": 0.7680653929710388, "eval_runtime": 586.5219, "eval_samples_per_second": 170.497, "eval_steps_per_second": 42.624, "step": 710000 }, { "epoch": 0.76, "learning_rate": 8.697358679339671e-06, "loss": 0.8269, "step": 710500 }, { "epoch": 0.76, "learning_rate": 8.682351175587795e-06, "loss": 0.8123, "step": 711000 }, { "epoch": 0.76, "learning_rate": 8.667343671835917e-06, "loss": 0.8246, "step": 711500 }, { "epoch": 0.76, "learning_rate": 8.652336168084043e-06, "loss": 0.808, "step": 712000 }, { "epoch": 0.76, "learning_rate": 8.637328664332167e-06, "loss": 0.8297, "step": 712500 }, { "epoch": 0.76, "learning_rate": 8.62232116058029e-06, "loss": 0.8164, "step": 713000 }, { "epoch": 0.77, "learning_rate": 8.607313656828414e-06, "loss": 0.8028, "step": 713500 }, { "epoch": 0.77, "learning_rate": 8.592306153076538e-06, "loss": 0.8048, "step": 714000 }, { "epoch": 0.77, "learning_rate": 8.577298649324662e-06, "loss": 0.8233, "step": 714500 }, { "epoch": 0.77, "learning_rate": 8.562291145572786e-06, "loss": 0.8238, "step": 715000 }, { "epoch": 0.77, "learning_rate": 8.54728364182091e-06, "loss": 0.8322, "step": 715500 }, { "epoch": 0.77, "learning_rate": 8.532276138069034e-06, "loss": 0.8237, "step": 716000 }, { "epoch": 0.77, "learning_rate": 8.51726863431716e-06, "loss": 0.8338, "step": 716500 }, { "epoch": 0.77, "learning_rate": 8.502261130565282e-06, "loss": 0.8221, "step": 717000 }, { "epoch": 0.77, "learning_rate": 8.487253626813408e-06, "loss": 0.809, "step": 717500 }, { "epoch": 0.77, "learning_rate": 8.472276138069036e-06, "loss": 0.8317, "step": 718000 }, { "epoch": 0.77, "learning_rate": 8.457268634317159e-06, "loss": 0.8149, "step": 718500 }, { "epoch": 0.77, "learning_rate": 8.442261130565283e-06, "loss": 0.8168, "step": 719000 }, { "epoch": 0.77, "learning_rate": 8.427253626813408e-06, "loss": 0.8143, "step": 719500 }, { "epoch": 0.77, "learning_rate": 8.41224612306153e-06, "loss": 0.8208, "step": 720000 }, { "epoch": 0.77, "eval_loss": 0.7673591375350952, "eval_runtime": 614.272, "eval_samples_per_second": 162.794, "eval_steps_per_second": 40.699, "step": 720000 }, { "epoch": 0.77, "learning_rate": 8.397238619309654e-06, "loss": 0.825, "step": 720500 }, { "epoch": 0.77, "learning_rate": 8.38223111555778e-06, "loss": 0.8217, "step": 721000 }, { "epoch": 0.77, "learning_rate": 8.367223611805904e-06, "loss": 0.8191, "step": 721500 }, { "epoch": 0.77, "learning_rate": 8.352246123061531e-06, "loss": 0.8185, "step": 722000 }, { "epoch": 0.77, "learning_rate": 8.337238619309655e-06, "loss": 0.8198, "step": 722500 }, { "epoch": 0.78, "learning_rate": 8.322231115557779e-06, "loss": 0.8174, "step": 723000 }, { "epoch": 0.78, "learning_rate": 8.307223611805903e-06, "loss": 0.8028, "step": 723500 }, { "epoch": 0.78, "learning_rate": 8.292246123061532e-06, "loss": 0.8118, "step": 724000 }, { "epoch": 0.78, "learning_rate": 8.277238619309654e-06, "loss": 0.8277, "step": 724500 }, { "epoch": 0.78, "learning_rate": 8.262261130565283e-06, "loss": 0.8261, "step": 725000 }, { "epoch": 0.78, "learning_rate": 8.247253626813408e-06, "loss": 0.8166, "step": 725500 }, { "epoch": 0.78, "learning_rate": 8.23224612306153e-06, "loss": 0.8226, "step": 726000 }, { "epoch": 0.78, "learning_rate": 8.217238619309654e-06, "loss": 0.8205, "step": 726500 }, { "epoch": 0.78, "learning_rate": 8.20223111555778e-06, "loss": 0.8266, "step": 727000 }, { "epoch": 0.78, "learning_rate": 8.187223611805902e-06, "loss": 0.8173, "step": 727500 }, { "epoch": 0.78, "learning_rate": 8.172246123061531e-06, "loss": 0.8102, "step": 728000 }, { "epoch": 0.78, "learning_rate": 8.157238619309655e-06, "loss": 0.8065, "step": 728500 }, { "epoch": 0.78, "learning_rate": 8.142231115557779e-06, "loss": 0.8224, "step": 729000 }, { "epoch": 0.78, "learning_rate": 8.127223611805903e-06, "loss": 0.8121, "step": 729500 }, { "epoch": 0.78, "learning_rate": 8.112216108054028e-06, "loss": 0.8068, "step": 730000 }, { "epoch": 0.78, "eval_loss": 0.7651957869529724, "eval_runtime": 605.1423, "eval_samples_per_second": 165.25, "eval_steps_per_second": 41.313, "step": 730000 }, { "epoch": 0.78, "learning_rate": 8.09720860430215e-06, "loss": 0.8107, "step": 730500 }, { "epoch": 0.78, "learning_rate": 8.082201100550275e-06, "loss": 0.82, "step": 731000 }, { "epoch": 0.78, "learning_rate": 8.0671935967984e-06, "loss": 0.8167, "step": 731500 }, { "epoch": 0.78, "learning_rate": 8.052216108054027e-06, "loss": 0.8155, "step": 732000 }, { "epoch": 0.79, "learning_rate": 8.037208604302151e-06, "loss": 0.8169, "step": 732500 }, { "epoch": 0.79, "learning_rate": 8.022201100550275e-06, "loss": 0.8167, "step": 733000 }, { "epoch": 0.79, "learning_rate": 8.007223611805902e-06, "loss": 0.8225, "step": 733500 }, { "epoch": 0.79, "learning_rate": 7.992216108054028e-06, "loss": 0.8243, "step": 734000 }, { "epoch": 0.79, "learning_rate": 7.977208604302152e-06, "loss": 0.8146, "step": 734500 }, { "epoch": 0.79, "learning_rate": 7.962201100550274e-06, "loss": 0.8102, "step": 735000 }, { "epoch": 0.79, "learning_rate": 7.9471935967984e-06, "loss": 0.814, "step": 735500 }, { "epoch": 0.79, "learning_rate": 7.932186093046524e-06, "loss": 0.8221, "step": 736000 }, { "epoch": 0.79, "learning_rate": 7.917178589294648e-06, "loss": 0.8207, "step": 736500 }, { "epoch": 0.79, "learning_rate": 7.902171085542771e-06, "loss": 0.817, "step": 737000 }, { "epoch": 0.79, "learning_rate": 7.8871935967984e-06, "loss": 0.8043, "step": 737500 }, { "epoch": 0.79, "learning_rate": 7.872186093046524e-06, "loss": 0.8082, "step": 738000 }, { "epoch": 0.79, "learning_rate": 7.857178589294646e-06, "loss": 0.8119, "step": 738500 }, { "epoch": 0.79, "learning_rate": 7.842171085542772e-06, "loss": 0.8165, "step": 739000 }, { "epoch": 0.79, "learning_rate": 7.827193596798399e-06, "loss": 0.8235, "step": 739500 }, { "epoch": 0.79, "learning_rate": 7.812186093046523e-06, "loss": 0.8118, "step": 740000 }, { "epoch": 0.79, "eval_loss": 0.7625706195831299, "eval_runtime": 571.8371, "eval_samples_per_second": 174.875, "eval_steps_per_second": 43.719, "step": 740000 }, { "epoch": 0.79, "learning_rate": 7.797178589294647e-06, "loss": 0.8077, "step": 740500 }, { "epoch": 0.79, "learning_rate": 7.782171085542773e-06, "loss": 0.8171, "step": 741000 }, { "epoch": 0.8, "learning_rate": 7.7671935967984e-06, "loss": 0.807, "step": 741500 }, { "epoch": 0.8, "learning_rate": 7.752186093046524e-06, "loss": 0.8073, "step": 742000 }, { "epoch": 0.8, "learning_rate": 7.737178589294648e-06, "loss": 0.824, "step": 742500 }, { "epoch": 0.8, "learning_rate": 7.722171085542771e-06, "loss": 0.8159, "step": 743000 }, { "epoch": 0.8, "learning_rate": 7.7071935967984e-06, "loss": 0.8057, "step": 743500 }, { "epoch": 0.8, "learning_rate": 7.692216108054027e-06, "loss": 0.8145, "step": 744000 }, { "epoch": 0.8, "learning_rate": 7.677208604302151e-06, "loss": 0.8241, "step": 744500 }, { "epoch": 0.8, "learning_rate": 7.662201100550275e-06, "loss": 0.8223, "step": 745000 }, { "epoch": 0.8, "learning_rate": 7.6471935967984e-06, "loss": 0.8127, "step": 745500 }, { "epoch": 0.8, "learning_rate": 7.632186093046523e-06, "loss": 0.8108, "step": 746000 }, { "epoch": 0.8, "learning_rate": 7.617178589294648e-06, "loss": 0.8077, "step": 746500 }, { "epoch": 0.8, "learning_rate": 7.602201100550274e-06, "loss": 0.8255, "step": 747000 }, { "epoch": 0.8, "learning_rate": 7.5871935967984e-06, "loss": 0.8239, "step": 747500 }, { "epoch": 0.8, "learning_rate": 7.572216108054028e-06, "loss": 0.815, "step": 748000 }, { "epoch": 0.8, "learning_rate": 7.557208604302151e-06, "loss": 0.8117, "step": 748500 }, { "epoch": 0.8, "learning_rate": 7.542201100550276e-06, "loss": 0.8061, "step": 749000 }, { "epoch": 0.8, "learning_rate": 7.5271935967983995e-06, "loss": 0.8132, "step": 749500 }, { "epoch": 0.8, "learning_rate": 7.512186093046523e-06, "loss": 0.8134, "step": 750000 }, { "epoch": 0.8, "eval_loss": 0.761015772819519, "eval_runtime": 586.4258, "eval_samples_per_second": 170.525, "eval_steps_per_second": 42.631, "step": 750000 }, { "epoch": 0.8, "learning_rate": 7.497178589294647e-06, "loss": 0.8265, "step": 750500 }, { "epoch": 0.81, "learning_rate": 7.482171085542771e-06, "loss": 0.8161, "step": 751000 }, { "epoch": 0.81, "learning_rate": 7.467163581790896e-06, "loss": 0.8179, "step": 751500 }, { "epoch": 0.81, "learning_rate": 7.452156078039019e-06, "loss": 0.8219, "step": 752000 }, { "epoch": 0.81, "learning_rate": 7.437178589294647e-06, "loss": 0.8043, "step": 752500 }, { "epoch": 0.81, "learning_rate": 7.422171085542772e-06, "loss": 0.8199, "step": 753000 }, { "epoch": 0.81, "learning_rate": 7.407163581790896e-06, "loss": 0.809, "step": 753500 }, { "epoch": 0.81, "learning_rate": 7.39215607803902e-06, "loss": 0.8071, "step": 754000 }, { "epoch": 0.81, "learning_rate": 7.377148574287144e-06, "loss": 0.8114, "step": 754500 }, { "epoch": 0.81, "learning_rate": 7.3621710855427716e-06, "loss": 0.8021, "step": 755000 }, { "epoch": 0.81, "learning_rate": 7.3471635817908955e-06, "loss": 0.8117, "step": 755500 }, { "epoch": 0.81, "learning_rate": 7.33215607803902e-06, "loss": 0.8195, "step": 756000 }, { "epoch": 0.81, "learning_rate": 7.317148574287143e-06, "loss": 0.807, "step": 756500 }, { "epoch": 0.81, "learning_rate": 7.302141070535268e-06, "loss": 0.8047, "step": 757000 }, { "epoch": 0.81, "learning_rate": 7.287163581790896e-06, "loss": 0.8079, "step": 757500 }, { "epoch": 0.81, "learning_rate": 7.27215607803902e-06, "loss": 0.821, "step": 758000 }, { "epoch": 0.81, "learning_rate": 7.257148574287144e-06, "loss": 0.7959, "step": 758500 }, { "epoch": 0.81, "learning_rate": 7.242141070535268e-06, "loss": 0.8177, "step": 759000 }, { "epoch": 0.81, "learning_rate": 7.227163581790896e-06, "loss": 0.8155, "step": 759500 }, { "epoch": 0.81, "learning_rate": 7.21215607803902e-06, "loss": 0.8152, "step": 760000 }, { "epoch": 0.81, "eval_loss": 0.7589353919029236, "eval_runtime": 594.6054, "eval_samples_per_second": 168.179, "eval_steps_per_second": 42.045, "step": 760000 }, { "epoch": 0.82, "learning_rate": 7.197148574287144e-06, "loss": 0.8072, "step": 760500 }, { "epoch": 0.82, "learning_rate": 7.1821410705352676e-06, "loss": 0.8052, "step": 761000 }, { "epoch": 0.82, "learning_rate": 7.167133566783392e-06, "loss": 0.8271, "step": 761500 }, { "epoch": 0.82, "learning_rate": 7.152126063031516e-06, "loss": 0.8098, "step": 762000 }, { "epoch": 0.82, "learning_rate": 7.13711855927964e-06, "loss": 0.8081, "step": 762500 }, { "epoch": 0.82, "learning_rate": 7.122111055527764e-06, "loss": 0.8172, "step": 763000 }, { "epoch": 0.82, "learning_rate": 7.107103551775888e-06, "loss": 0.8152, "step": 763500 }, { "epoch": 0.82, "learning_rate": 7.092096048024012e-06, "loss": 0.8143, "step": 764000 }, { "epoch": 0.82, "learning_rate": 7.077088544272136e-06, "loss": 0.8038, "step": 764500 }, { "epoch": 0.82, "learning_rate": 7.062081040520261e-06, "loss": 0.8107, "step": 765000 }, { "epoch": 0.82, "learning_rate": 7.047103551775889e-06, "loss": 0.7959, "step": 765500 }, { "epoch": 0.82, "learning_rate": 7.032096048024012e-06, "loss": 0.8229, "step": 766000 }, { "epoch": 0.82, "learning_rate": 7.0170885442721365e-06, "loss": 0.8147, "step": 766500 }, { "epoch": 0.82, "learning_rate": 7.0020810405202604e-06, "loss": 0.8026, "step": 767000 }, { "epoch": 0.82, "learning_rate": 6.9871335667833915e-06, "loss": 0.8015, "step": 767500 }, { "epoch": 0.82, "learning_rate": 6.972126063031516e-06, "loss": 0.8041, "step": 768000 }, { "epoch": 0.82, "learning_rate": 6.957118559279639e-06, "loss": 0.8258, "step": 768500 }, { "epoch": 0.82, "learning_rate": 6.942111055527764e-06, "loss": 0.8184, "step": 769000 }, { "epoch": 0.83, "learning_rate": 6.927103551775888e-06, "loss": 0.8076, "step": 769500 }, { "epoch": 0.83, "learning_rate": 6.912096048024013e-06, "loss": 0.8106, "step": 770000 }, { "epoch": 0.83, "eval_loss": 0.7582979202270508, "eval_runtime": 581.7955, "eval_samples_per_second": 171.882, "eval_steps_per_second": 42.97, "step": 770000 }, { "epoch": 0.83, "learning_rate": 6.897088544272136e-06, "loss": 0.8126, "step": 770500 }, { "epoch": 0.83, "learning_rate": 6.882111055527764e-06, "loss": 0.8049, "step": 771000 }, { "epoch": 0.83, "learning_rate": 6.867103551775888e-06, "loss": 0.8076, "step": 771500 }, { "epoch": 0.83, "learning_rate": 6.852096048024013e-06, "loss": 0.8063, "step": 772000 }, { "epoch": 0.83, "learning_rate": 6.837088544272136e-06, "loss": 0.8151, "step": 772500 }, { "epoch": 0.83, "learning_rate": 6.8220810405202605e-06, "loss": 0.8293, "step": 773000 }, { "epoch": 0.83, "learning_rate": 6.807073536768384e-06, "loss": 0.8075, "step": 773500 }, { "epoch": 0.83, "learning_rate": 6.792096048024012e-06, "loss": 0.8077, "step": 774000 }, { "epoch": 0.83, "learning_rate": 6.777088544272136e-06, "loss": 0.8028, "step": 774500 }, { "epoch": 0.83, "learning_rate": 6.76208104052026e-06, "loss": 0.8217, "step": 775000 }, { "epoch": 0.83, "learning_rate": 6.747073536768384e-06, "loss": 0.8213, "step": 775500 }, { "epoch": 0.83, "learning_rate": 6.732066033016509e-06, "loss": 0.8068, "step": 776000 }, { "epoch": 0.83, "learning_rate": 6.717058529264632e-06, "loss": 0.802, "step": 776500 }, { "epoch": 0.83, "learning_rate": 6.702051025512757e-06, "loss": 0.8199, "step": 777000 }, { "epoch": 0.83, "learning_rate": 6.687043521760881e-06, "loss": 0.8121, "step": 777500 }, { "epoch": 0.83, "learning_rate": 6.672036018009005e-06, "loss": 0.8154, "step": 778000 }, { "epoch": 0.83, "learning_rate": 6.6570585292646326e-06, "loss": 0.804, "step": 778500 }, { "epoch": 0.84, "learning_rate": 6.64208104052026e-06, "loss": 0.8103, "step": 779000 }, { "epoch": 0.84, "learning_rate": 6.6270735367683844e-06, "loss": 0.7978, "step": 779500 }, { "epoch": 0.84, "learning_rate": 6.612096048024012e-06, "loss": 0.812, "step": 780000 }, { "epoch": 0.84, "eval_loss": 0.7558347582817078, "eval_runtime": 600.211, "eval_samples_per_second": 166.608, "eval_steps_per_second": 41.652, "step": 780000 }, { "epoch": 0.84, "learning_rate": 6.597088544272136e-06, "loss": 0.7998, "step": 780500 }, { "epoch": 0.84, "learning_rate": 6.58208104052026e-06, "loss": 0.8003, "step": 781000 }, { "epoch": 0.84, "learning_rate": 6.567073536768384e-06, "loss": 0.8138, "step": 781500 }, { "epoch": 0.84, "learning_rate": 6.552096048024012e-06, "loss": 0.7981, "step": 782000 }, { "epoch": 0.84, "learning_rate": 6.537088544272137e-06, "loss": 0.8065, "step": 782500 }, { "epoch": 0.84, "learning_rate": 6.52208104052026e-06, "loss": 0.7939, "step": 783000 }, { "epoch": 0.84, "learning_rate": 6.507073536768384e-06, "loss": 0.7971, "step": 783500 }, { "epoch": 0.84, "learning_rate": 6.492066033016509e-06, "loss": 0.8023, "step": 784000 }, { "epoch": 0.84, "learning_rate": 6.477058529264633e-06, "loss": 0.8124, "step": 784500 }, { "epoch": 0.84, "learning_rate": 6.4620510255127565e-06, "loss": 0.8006, "step": 785000 }, { "epoch": 0.84, "learning_rate": 6.4470435217608805e-06, "loss": 0.8008, "step": 785500 }, { "epoch": 0.84, "learning_rate": 6.432036018009005e-06, "loss": 0.8218, "step": 786000 }, { "epoch": 0.84, "learning_rate": 6.417028514257128e-06, "loss": 0.8166, "step": 786500 }, { "epoch": 0.84, "learning_rate": 6.402021010505252e-06, "loss": 0.7998, "step": 787000 }, { "epoch": 0.84, "learning_rate": 6.387013506753377e-06, "loss": 0.814, "step": 787500 }, { "epoch": 0.85, "learning_rate": 6.372006003001501e-06, "loss": 0.8006, "step": 788000 }, { "epoch": 0.85, "learning_rate": 6.356998499249625e-06, "loss": 0.8054, "step": 788500 }, { "epoch": 0.85, "learning_rate": 6.341990995497749e-06, "loss": 0.8008, "step": 789000 }, { "epoch": 0.85, "learning_rate": 6.326983491745874e-06, "loss": 0.8155, "step": 789500 }, { "epoch": 0.85, "learning_rate": 6.3120060030015015e-06, "loss": 0.8077, "step": 790000 }, { "epoch": 0.85, "eval_loss": 0.7543774843215942, "eval_runtime": 577.6914, "eval_samples_per_second": 173.103, "eval_steps_per_second": 43.276, "step": 790000 }, { "epoch": 0.85, "learning_rate": 6.296998499249625e-06, "loss": 0.7926, "step": 790500 }, { "epoch": 0.85, "learning_rate": 6.2819909954977485e-06, "loss": 0.8047, "step": 791000 }, { "epoch": 0.85, "learning_rate": 6.2670135067533765e-06, "loss": 0.808, "step": 791500 }, { "epoch": 0.85, "learning_rate": 6.252006003001501e-06, "loss": 0.7919, "step": 792000 }, { "epoch": 0.85, "learning_rate": 6.237028514257129e-06, "loss": 0.7999, "step": 792500 }, { "epoch": 0.85, "learning_rate": 6.222021010505252e-06, "loss": 0.806, "step": 793000 }, { "epoch": 0.85, "learning_rate": 6.207013506753377e-06, "loss": 0.7944, "step": 793500 }, { "epoch": 0.85, "learning_rate": 6.192006003001501e-06, "loss": 0.8031, "step": 794000 }, { "epoch": 0.85, "learning_rate": 6.177028514257129e-06, "loss": 0.8094, "step": 794500 }, { "epoch": 0.85, "learning_rate": 6.162021010505252e-06, "loss": 0.8119, "step": 795000 }, { "epoch": 0.85, "learning_rate": 6.147013506753377e-06, "loss": 0.8092, "step": 795500 }, { "epoch": 0.85, "learning_rate": 6.132006003001501e-06, "loss": 0.7965, "step": 796000 }, { "epoch": 0.85, "learning_rate": 6.1169984992496255e-06, "loss": 0.8127, "step": 796500 }, { "epoch": 0.85, "learning_rate": 6.1019909954977486e-06, "loss": 0.8076, "step": 797000 }, { "epoch": 0.86, "learning_rate": 6.086983491745873e-06, "loss": 0.8006, "step": 797500 }, { "epoch": 0.86, "learning_rate": 6.071975987993997e-06, "loss": 0.8039, "step": 798000 }, { "epoch": 0.86, "learning_rate": 6.056968484242122e-06, "loss": 0.8076, "step": 798500 }, { "epoch": 0.86, "learning_rate": 6.041960980490245e-06, "loss": 0.7912, "step": 799000 }, { "epoch": 0.86, "learning_rate": 6.026953476738369e-06, "loss": 0.8113, "step": 799500 }, { "epoch": 0.86, "learning_rate": 6.011945972986494e-06, "loss": 0.8112, "step": 800000 }, { "epoch": 0.86, "eval_loss": 0.7531115412712097, "eval_runtime": 659.7719, "eval_samples_per_second": 151.568, "eval_steps_per_second": 37.892, "step": 800000 }, { "epoch": 0.86, "learning_rate": 5.996968484242122e-06, "loss": 0.7955, "step": 800500 }, { "epoch": 0.86, "learning_rate": 5.981960980490245e-06, "loss": 0.8159, "step": 801000 }, { "epoch": 0.86, "learning_rate": 5.96695347673837e-06, "loss": 0.8022, "step": 801500 }, { "epoch": 0.86, "learning_rate": 5.9519459729864936e-06, "loss": 0.806, "step": 802000 }, { "epoch": 0.86, "learning_rate": 5.9369684842421215e-06, "loss": 0.806, "step": 802500 }, { "epoch": 0.86, "learning_rate": 5.921960980490245e-06, "loss": 0.8175, "step": 803000 }, { "epoch": 0.86, "learning_rate": 5.906953476738369e-06, "loss": 0.8015, "step": 803500 }, { "epoch": 0.86, "learning_rate": 5.891975987993997e-06, "loss": 0.8171, "step": 804000 }, { "epoch": 0.86, "learning_rate": 5.876968484242121e-06, "loss": 0.8022, "step": 804500 }, { "epoch": 0.86, "learning_rate": 5.861960980490245e-06, "loss": 0.81, "step": 805000 }, { "epoch": 0.86, "learning_rate": 5.846953476738369e-06, "loss": 0.7946, "step": 805500 }, { "epoch": 0.86, "learning_rate": 5.831945972986494e-06, "loss": 0.8044, "step": 806000 }, { "epoch": 0.86, "learning_rate": 5.816968484242122e-06, "loss": 0.7969, "step": 806500 }, { "epoch": 0.87, "learning_rate": 5.801960980490245e-06, "loss": 0.8136, "step": 807000 }, { "epoch": 0.87, "learning_rate": 5.786953476738369e-06, "loss": 0.814, "step": 807500 }, { "epoch": 0.87, "learning_rate": 5.771945972986494e-06, "loss": 0.7976, "step": 808000 }, { "epoch": 0.87, "learning_rate": 5.7569384692346175e-06, "loss": 0.793, "step": 808500 }, { "epoch": 0.87, "learning_rate": 5.7419309654827415e-06, "loss": 0.8028, "step": 809000 }, { "epoch": 0.87, "learning_rate": 5.726923461730865e-06, "loss": 0.8023, "step": 809500 }, { "epoch": 0.87, "learning_rate": 5.71191595797899e-06, "loss": 0.8162, "step": 810000 }, { "epoch": 0.87, "eval_loss": 0.7523924708366394, "eval_runtime": 648.5551, "eval_samples_per_second": 154.189, "eval_steps_per_second": 38.547, "step": 810000 }, { "epoch": 0.87, "learning_rate": 5.696938469234618e-06, "loss": 0.809, "step": 810500 }, { "epoch": 0.87, "learning_rate": 5.681930965482741e-06, "loss": 0.8036, "step": 811000 }, { "epoch": 0.87, "learning_rate": 5.666923461730865e-06, "loss": 0.8188, "step": 811500 }, { "epoch": 0.87, "learning_rate": 5.65191595797899e-06, "loss": 0.8053, "step": 812000 }, { "epoch": 0.87, "learning_rate": 5.636938469234618e-06, "loss": 0.802, "step": 812500 }, { "epoch": 0.87, "learning_rate": 5.621960980490246e-06, "loss": 0.8064, "step": 813000 }, { "epoch": 0.87, "learning_rate": 5.606953476738369e-06, "loss": 0.8014, "step": 813500 }, { "epoch": 0.87, "learning_rate": 5.591945972986494e-06, "loss": 0.8038, "step": 814000 }, { "epoch": 0.87, "learning_rate": 5.5769384692346176e-06, "loss": 0.8161, "step": 814500 }, { "epoch": 0.87, "learning_rate": 5.5619309654827415e-06, "loss": 0.7968, "step": 815000 }, { "epoch": 0.87, "learning_rate": 5.546953476738369e-06, "loss": 0.7984, "step": 815500 }, { "epoch": 0.88, "learning_rate": 5.531945972986493e-06, "loss": 0.8034, "step": 816000 }, { "epoch": 0.88, "learning_rate": 5.516938469234617e-06, "loss": 0.8058, "step": 816500 }, { "epoch": 0.88, "learning_rate": 5.501930965482742e-06, "loss": 0.8205, "step": 817000 }, { "epoch": 0.88, "learning_rate": 5.486923461730865e-06, "loss": 0.794, "step": 817500 }, { "epoch": 0.88, "learning_rate": 5.47191595797899e-06, "loss": 0.8056, "step": 818000 }, { "epoch": 0.88, "learning_rate": 5.456938469234618e-06, "loss": 0.7963, "step": 818500 }, { "epoch": 0.88, "learning_rate": 5.441930965482742e-06, "loss": 0.8053, "step": 819000 }, { "epoch": 0.88, "learning_rate": 5.426923461730865e-06, "loss": 0.8077, "step": 819500 }, { "epoch": 0.88, "learning_rate": 5.41191595797899e-06, "loss": 0.7988, "step": 820000 }, { "epoch": 0.88, "eval_loss": 0.7511362433433533, "eval_runtime": 622.036, "eval_samples_per_second": 160.762, "eval_steps_per_second": 40.191, "step": 820000 }, { "epoch": 0.88, "learning_rate": 5.396908454227114e-06, "loss": 0.8059, "step": 820500 }, { "epoch": 0.88, "learning_rate": 5.3819309654827415e-06, "loss": 0.8038, "step": 821000 }, { "epoch": 0.88, "learning_rate": 5.3669234617308654e-06, "loss": 0.7953, "step": 821500 }, { "epoch": 0.88, "learning_rate": 5.351915957978989e-06, "loss": 0.7935, "step": 822000 }, { "epoch": 0.88, "learning_rate": 5.336908454227114e-06, "loss": 0.8052, "step": 822500 }, { "epoch": 0.88, "learning_rate": 5.321900950475238e-06, "loss": 0.8097, "step": 823000 }, { "epoch": 0.88, "learning_rate": 5.306893446723361e-06, "loss": 0.808, "step": 823500 }, { "epoch": 0.88, "learning_rate": 5.291915957978989e-06, "loss": 0.7991, "step": 824000 }, { "epoch": 0.88, "learning_rate": 5.276908454227114e-06, "loss": 0.8031, "step": 824500 }, { "epoch": 0.88, "learning_rate": 5.261900950475238e-06, "loss": 0.8172, "step": 825000 }, { "epoch": 0.89, "learning_rate": 5.246893446723362e-06, "loss": 0.7948, "step": 825500 }, { "epoch": 0.89, "learning_rate": 5.231885942971486e-06, "loss": 0.8089, "step": 826000 }, { "epoch": 0.89, "learning_rate": 5.2168784392196105e-06, "loss": 0.7969, "step": 826500 }, { "epoch": 0.89, "learning_rate": 5.201870935467734e-06, "loss": 0.8, "step": 827000 }, { "epoch": 0.89, "learning_rate": 5.1868634317158575e-06, "loss": 0.7848, "step": 827500 }, { "epoch": 0.89, "learning_rate": 5.171885942971485e-06, "loss": 0.8025, "step": 828000 }, { "epoch": 0.89, "learning_rate": 5.15687843921961e-06, "loss": 0.7814, "step": 828500 }, { "epoch": 0.89, "learning_rate": 5.141900950475238e-06, "loss": 0.7938, "step": 829000 }, { "epoch": 0.89, "learning_rate": 5.126893446723362e-06, "loss": 0.8011, "step": 829500 }, { "epoch": 0.89, "learning_rate": 5.111885942971486e-06, "loss": 0.8038, "step": 830000 }, { "epoch": 0.89, "eval_loss": 0.7489431500434875, "eval_runtime": 627.3417, "eval_samples_per_second": 159.403, "eval_steps_per_second": 39.851, "step": 830000 }, { "epoch": 0.89, "learning_rate": 5.09687843921961e-06, "loss": 0.8, "step": 830500 }, { "epoch": 0.89, "learning_rate": 5.081870935467734e-06, "loss": 0.7896, "step": 831000 }, { "epoch": 0.89, "learning_rate": 5.066863431715858e-06, "loss": 0.81, "step": 831500 }, { "epoch": 0.89, "learning_rate": 5.051855927963982e-06, "loss": 0.8004, "step": 832000 }, { "epoch": 0.89, "learning_rate": 5.0368484242121065e-06, "loss": 0.8019, "step": 832500 }, { "epoch": 0.89, "learning_rate": 5.021870935467734e-06, "loss": 0.794, "step": 833000 }, { "epoch": 0.89, "learning_rate": 5.0068634317158575e-06, "loss": 0.8003, "step": 833500 }, { "epoch": 0.89, "learning_rate": 4.9918859429714854e-06, "loss": 0.808, "step": 834000 }, { "epoch": 0.89, "learning_rate": 4.97687843921961e-06, "loss": 0.7961, "step": 834500 }, { "epoch": 0.9, "learning_rate": 4.961870935467734e-06, "loss": 0.7936, "step": 835000 }, { "epoch": 0.9, "learning_rate": 4.946863431715858e-06, "loss": 0.8034, "step": 835500 }, { "epoch": 0.9, "learning_rate": 4.931855927963982e-06, "loss": 0.7837, "step": 836000 }, { "epoch": 0.9, "learning_rate": 4.916848424212106e-06, "loss": 0.791, "step": 836500 }, { "epoch": 0.9, "learning_rate": 4.901840920460231e-06, "loss": 0.8138, "step": 837000 }, { "epoch": 0.9, "learning_rate": 4.886833416708354e-06, "loss": 0.8031, "step": 837500 }, { "epoch": 0.9, "learning_rate": 4.871855927963982e-06, "loss": 0.7994, "step": 838000 }, { "epoch": 0.9, "learning_rate": 4.8568484242121065e-06, "loss": 0.7981, "step": 838500 }, { "epoch": 0.9, "learning_rate": 4.8418409204602304e-06, "loss": 0.7969, "step": 839000 }, { "epoch": 0.9, "learning_rate": 4.826833416708354e-06, "loss": 0.7991, "step": 839500 }, { "epoch": 0.9, "learning_rate": 4.8118559279639815e-06, "loss": 0.7978, "step": 840000 }, { "epoch": 0.9, "eval_loss": 0.7486736178398132, "eval_runtime": 614.7602, "eval_samples_per_second": 162.665, "eval_steps_per_second": 40.666, "step": 840000 }, { "epoch": 0.9, "learning_rate": 4.796848424212106e-06, "loss": 0.7944, "step": 840500 }, { "epoch": 0.9, "learning_rate": 4.78184092046023e-06, "loss": 0.7894, "step": 841000 }, { "epoch": 0.9, "learning_rate": 4.766833416708355e-06, "loss": 0.8049, "step": 841500 }, { "epoch": 0.9, "learning_rate": 4.751855927963982e-06, "loss": 0.7895, "step": 842000 }, { "epoch": 0.9, "learning_rate": 4.73687843921961e-06, "loss": 0.7994, "step": 842500 }, { "epoch": 0.9, "learning_rate": 4.721870935467734e-06, "loss": 0.7957, "step": 843000 }, { "epoch": 0.9, "learning_rate": 4.706863431715858e-06, "loss": 0.8035, "step": 843500 }, { "epoch": 0.91, "learning_rate": 4.691855927963983e-06, "loss": 0.8004, "step": 844000 }, { "epoch": 0.91, "learning_rate": 4.676848424212106e-06, "loss": 0.8055, "step": 844500 }, { "epoch": 0.91, "learning_rate": 4.661870935467734e-06, "loss": 0.7879, "step": 845000 }, { "epoch": 0.91, "learning_rate": 4.646863431715858e-06, "loss": 0.7818, "step": 845500 }, { "epoch": 0.91, "learning_rate": 4.631855927963982e-06, "loss": 0.7879, "step": 846000 }, { "epoch": 0.91, "learning_rate": 4.616848424212106e-06, "loss": 0.7961, "step": 846500 }, { "epoch": 0.91, "learning_rate": 4.60184092046023e-06, "loss": 0.7985, "step": 847000 }, { "epoch": 0.91, "learning_rate": 4.586833416708354e-06, "loss": 0.7991, "step": 847500 }, { "epoch": 0.91, "learning_rate": 4.571825912956478e-06, "loss": 0.7947, "step": 848000 }, { "epoch": 0.91, "learning_rate": 4.556818409204602e-06, "loss": 0.7986, "step": 848500 }, { "epoch": 0.91, "learning_rate": 4.541810905452727e-06, "loss": 0.7947, "step": 849000 }, { "epoch": 0.91, "learning_rate": 4.526863431715858e-06, "loss": 0.7932, "step": 849500 }, { "epoch": 0.91, "learning_rate": 4.511855927963983e-06, "loss": 0.7925, "step": 850000 }, { "epoch": 0.91, "eval_loss": 0.74729984998703, "eval_runtime": 633.8245, "eval_samples_per_second": 157.772, "eval_steps_per_second": 39.443, "step": 850000 }, { "epoch": 0.91, "learning_rate": 4.496848424212106e-06, "loss": 0.7857, "step": 850500 }, { "epoch": 0.91, "learning_rate": 4.481870935467734e-06, "loss": 0.7798, "step": 851000 }, { "epoch": 0.91, "learning_rate": 4.466863431715858e-06, "loss": 0.8151, "step": 851500 }, { "epoch": 0.91, "learning_rate": 4.451855927963982e-06, "loss": 0.805, "step": 852000 }, { "epoch": 0.91, "learning_rate": 4.436848424212106e-06, "loss": 0.8009, "step": 852500 }, { "epoch": 0.91, "learning_rate": 4.42184092046023e-06, "loss": 0.7944, "step": 853000 }, { "epoch": 0.92, "learning_rate": 4.406833416708354e-06, "loss": 0.8048, "step": 853500 }, { "epoch": 0.92, "learning_rate": 4.391825912956479e-06, "loss": 0.7969, "step": 854000 }, { "epoch": 0.92, "learning_rate": 4.376818409204602e-06, "loss": 0.8027, "step": 854500 }, { "epoch": 0.92, "learning_rate": 4.361810905452727e-06, "loss": 0.7892, "step": 855000 }, { "epoch": 0.92, "learning_rate": 4.346803401700851e-06, "loss": 0.7969, "step": 855500 }, { "epoch": 0.92, "learning_rate": 4.331795897948975e-06, "loss": 0.8099, "step": 856000 }, { "epoch": 0.92, "learning_rate": 4.316788394197099e-06, "loss": 0.8151, "step": 856500 }, { "epoch": 0.92, "learning_rate": 4.3018109054527265e-06, "loss": 0.7951, "step": 857000 }, { "epoch": 0.92, "learning_rate": 4.2868334167083545e-06, "loss": 0.7952, "step": 857500 }, { "epoch": 0.92, "learning_rate": 4.271825912956478e-06, "loss": 0.7942, "step": 858000 }, { "epoch": 0.92, "learning_rate": 4.256818409204603e-06, "loss": 0.7968, "step": 858500 }, { "epoch": 0.92, "learning_rate": 4.241810905452726e-06, "loss": 0.7929, "step": 859000 }, { "epoch": 0.92, "learning_rate": 4.22680340170085e-06, "loss": 0.8059, "step": 859500 }, { "epoch": 0.92, "learning_rate": 4.211825912956478e-06, "loss": 0.7919, "step": 860000 }, { "epoch": 0.92, "eval_loss": 0.7454802393913269, "eval_runtime": 606.3303, "eval_samples_per_second": 164.927, "eval_steps_per_second": 41.232, "step": 860000 }, { "epoch": 0.92, "learning_rate": 4.196818409204603e-06, "loss": 0.7904, "step": 860500 }, { "epoch": 0.92, "learning_rate": 4.181810905452726e-06, "loss": 0.8019, "step": 861000 }, { "epoch": 0.92, "learning_rate": 4.166803401700851e-06, "loss": 0.7888, "step": 861500 }, { "epoch": 0.92, "learning_rate": 4.151795897948975e-06, "loss": 0.7882, "step": 862000 }, { "epoch": 0.92, "learning_rate": 4.136788394197099e-06, "loss": 0.802, "step": 862500 }, { "epoch": 0.93, "learning_rate": 4.1217808904452225e-06, "loss": 0.798, "step": 863000 }, { "epoch": 0.93, "learning_rate": 4.1068034017008505e-06, "loss": 0.7931, "step": 863500 }, { "epoch": 0.93, "learning_rate": 4.091825912956478e-06, "loss": 0.8102, "step": 864000 }, { "epoch": 0.93, "learning_rate": 4.076818409204602e-06, "loss": 0.7989, "step": 864500 }, { "epoch": 0.93, "learning_rate": 4.061810905452726e-06, "loss": 0.7932, "step": 865000 }, { "epoch": 0.93, "learning_rate": 4.04680340170085e-06, "loss": 0.7873, "step": 865500 }, { "epoch": 0.93, "learning_rate": 4.031795897948975e-06, "loss": 0.7947, "step": 866000 }, { "epoch": 0.93, "learning_rate": 4.016788394197099e-06, "loss": 0.7919, "step": 866500 }, { "epoch": 0.93, "learning_rate": 4.001780890445223e-06, "loss": 0.7963, "step": 867000 }, { "epoch": 0.93, "learning_rate": 3.986773386693347e-06, "loss": 0.7956, "step": 867500 }, { "epoch": 0.93, "learning_rate": 3.971765882941471e-06, "loss": 0.7991, "step": 868000 }, { "epoch": 0.93, "learning_rate": 3.956758379189595e-06, "loss": 0.8067, "step": 868500 }, { "epoch": 0.93, "learning_rate": 3.9417508754377186e-06, "loss": 0.7864, "step": 869000 }, { "epoch": 0.93, "learning_rate": 3.926743371685843e-06, "loss": 0.7947, "step": 869500 }, { "epoch": 0.93, "learning_rate": 3.911765882941471e-06, "loss": 0.7883, "step": 870000 }, { "epoch": 0.93, "eval_loss": 0.743596076965332, "eval_runtime": 589.6574, "eval_samples_per_second": 169.59, "eval_steps_per_second": 42.398, "step": 870000 }, { "epoch": 0.93, "learning_rate": 3.896758379189595e-06, "loss": 0.7853, "step": 870500 }, { "epoch": 0.93, "learning_rate": 3.881780890445222e-06, "loss": 0.7879, "step": 871000 }, { "epoch": 0.93, "learning_rate": 3.866773386693346e-06, "loss": 0.797, "step": 871500 }, { "epoch": 0.94, "learning_rate": 3.851765882941471e-06, "loss": 0.8057, "step": 872000 }, { "epoch": 0.94, "learning_rate": 3.836758379189595e-06, "loss": 0.7998, "step": 872500 }, { "epoch": 0.94, "learning_rate": 3.821750875437719e-06, "loss": 0.8032, "step": 873000 }, { "epoch": 0.94, "learning_rate": 3.806743371685843e-06, "loss": 0.7969, "step": 873500 }, { "epoch": 0.94, "learning_rate": 3.791735867933967e-06, "loss": 0.8039, "step": 874000 }, { "epoch": 0.94, "learning_rate": 3.7767283641820915e-06, "loss": 0.7789, "step": 874500 }, { "epoch": 0.94, "learning_rate": 3.7617508754377186e-06, "loss": 0.7955, "step": 875000 }, { "epoch": 0.94, "learning_rate": 3.746743371685843e-06, "loss": 0.7855, "step": 875500 }, { "epoch": 0.94, "learning_rate": 3.7317358679339673e-06, "loss": 0.7946, "step": 876000 }, { "epoch": 0.94, "learning_rate": 3.7167283641820912e-06, "loss": 0.797, "step": 876500 }, { "epoch": 0.94, "learning_rate": 3.7017208604302156e-06, "loss": 0.7964, "step": 877000 }, { "epoch": 0.94, "learning_rate": 3.6867433716858427e-06, "loss": 0.7946, "step": 877500 }, { "epoch": 0.94, "learning_rate": 3.671735867933967e-06, "loss": 0.7962, "step": 878000 }, { "epoch": 0.94, "learning_rate": 3.656728364182091e-06, "loss": 0.7968, "step": 878500 }, { "epoch": 0.94, "learning_rate": 3.6417208604302153e-06, "loss": 0.7812, "step": 879000 }, { "epoch": 0.94, "learning_rate": 3.6267133566783392e-06, "loss": 0.797, "step": 879500 }, { "epoch": 0.94, "learning_rate": 3.6117058529264636e-06, "loss": 0.7819, "step": 880000 }, { "epoch": 0.94, "eval_loss": 0.7429205179214478, "eval_runtime": 604.5124, "eval_samples_per_second": 165.423, "eval_steps_per_second": 41.356, "step": 880000 }, { "epoch": 0.94, "learning_rate": 3.5966983491745875e-06, "loss": 0.7879, "step": 880500 }, { "epoch": 0.94, "learning_rate": 3.5816908454227115e-06, "loss": 0.7936, "step": 881000 }, { "epoch": 0.95, "learning_rate": 3.566713356678339e-06, "loss": 0.8068, "step": 881500 }, { "epoch": 0.95, "learning_rate": 3.5517058529264633e-06, "loss": 0.7924, "step": 882000 }, { "epoch": 0.95, "learning_rate": 3.5366983491745873e-06, "loss": 0.7907, "step": 882500 }, { "epoch": 0.95, "learning_rate": 3.5216908454227116e-06, "loss": 0.7876, "step": 883000 }, { "epoch": 0.95, "learning_rate": 3.506713356678339e-06, "loss": 0.7992, "step": 883500 }, { "epoch": 0.95, "learning_rate": 3.4917058529264635e-06, "loss": 0.7884, "step": 884000 }, { "epoch": 0.95, "learning_rate": 3.4766983491745874e-06, "loss": 0.7833, "step": 884500 }, { "epoch": 0.95, "learning_rate": 3.4616908454227118e-06, "loss": 0.7935, "step": 885000 }, { "epoch": 0.95, "learning_rate": 3.4467133566783393e-06, "loss": 0.7985, "step": 885500 }, { "epoch": 0.95, "learning_rate": 3.431705852926463e-06, "loss": 0.7935, "step": 886000 }, { "epoch": 0.95, "learning_rate": 3.416698349174587e-06, "loss": 0.7787, "step": 886500 }, { "epoch": 0.95, "learning_rate": 3.4016908454227115e-06, "loss": 0.7971, "step": 887000 }, { "epoch": 0.95, "learning_rate": 3.3867133566783394e-06, "loss": 0.7929, "step": 887500 }, { "epoch": 0.95, "learning_rate": 3.3717058529264634e-06, "loss": 0.7943, "step": 888000 }, { "epoch": 0.95, "learning_rate": 3.3566983491745877e-06, "loss": 0.7842, "step": 888500 }, { "epoch": 0.95, "learning_rate": 3.3416908454227116e-06, "loss": 0.7941, "step": 889000 }, { "epoch": 0.95, "learning_rate": 3.326713356678339e-06, "loss": 0.7939, "step": 889500 }, { "epoch": 0.95, "learning_rate": 3.311735867933967e-06, "loss": 0.7854, "step": 890000 }, { "epoch": 0.95, "eval_loss": 0.742018461227417, "eval_runtime": 599.0341, "eval_samples_per_second": 166.935, "eval_steps_per_second": 41.734, "step": 890000 }, { "epoch": 0.95, "learning_rate": 3.296728364182091e-06, "loss": 0.7893, "step": 890500 }, { "epoch": 0.96, "learning_rate": 3.281720860430215e-06, "loss": 0.803, "step": 891000 }, { "epoch": 0.96, "learning_rate": 3.2667133566783393e-06, "loss": 0.802, "step": 891500 }, { "epoch": 0.96, "learning_rate": 3.2517058529264632e-06, "loss": 0.7993, "step": 892000 }, { "epoch": 0.96, "learning_rate": 3.2366983491745876e-06, "loss": 0.8021, "step": 892500 }, { "epoch": 0.96, "learning_rate": 3.2216908454227115e-06, "loss": 0.7878, "step": 893000 }, { "epoch": 0.96, "learning_rate": 3.2066833416708354e-06, "loss": 0.781, "step": 893500 }, { "epoch": 0.96, "learning_rate": 3.1916758379189594e-06, "loss": 0.8049, "step": 894000 }, { "epoch": 0.96, "learning_rate": 3.1766683341670837e-06, "loss": 0.8089, "step": 894500 }, { "epoch": 0.96, "learning_rate": 3.1616608304152077e-06, "loss": 0.7842, "step": 895000 }, { "epoch": 0.96, "learning_rate": 3.1466533266633316e-06, "loss": 0.7958, "step": 895500 }, { "epoch": 0.96, "learning_rate": 3.131645822911456e-06, "loss": 0.7933, "step": 896000 }, { "epoch": 0.96, "learning_rate": 3.116668334167084e-06, "loss": 0.7816, "step": 896500 }, { "epoch": 0.96, "learning_rate": 3.101660830415208e-06, "loss": 0.798, "step": 897000 }, { "epoch": 0.96, "learning_rate": 3.0866833416708353e-06, "loss": 0.7837, "step": 897500 }, { "epoch": 0.96, "learning_rate": 3.0716758379189593e-06, "loss": 0.7781, "step": 898000 }, { "epoch": 0.96, "learning_rate": 3.0566683341670836e-06, "loss": 0.7966, "step": 898500 }, { "epoch": 0.96, "learning_rate": 3.0416608304152075e-06, "loss": 0.7896, "step": 899000 }, { "epoch": 0.96, "learning_rate": 3.026653326663332e-06, "loss": 0.776, "step": 899500 }, { "epoch": 0.97, "learning_rate": 3.011645822911456e-06, "loss": 0.7784, "step": 900000 }, { "epoch": 0.97, "eval_loss": 0.7412576675415039, "eval_runtime": 624.3312, "eval_samples_per_second": 160.171, "eval_steps_per_second": 40.043, "step": 900000 }, { "epoch": 0.97, "learning_rate": 2.99663831915958e-06, "loss": 0.7892, "step": 900500 }, { "epoch": 0.97, "learning_rate": 2.9816608304152077e-06, "loss": 0.7944, "step": 901000 }, { "epoch": 0.97, "learning_rate": 2.9666533266633316e-06, "loss": 0.7818, "step": 901500 }, { "epoch": 0.97, "learning_rate": 2.9516458229114555e-06, "loss": 0.7921, "step": 902000 }, { "epoch": 0.97, "learning_rate": 2.93663831915958e-06, "loss": 0.7927, "step": 902500 }, { "epoch": 0.97, "learning_rate": 2.921630815407704e-06, "loss": 0.7877, "step": 903000 }, { "epoch": 0.97, "learning_rate": 2.906623311655828e-06, "loss": 0.7997, "step": 903500 }, { "epoch": 0.97, "learning_rate": 2.8916458229114557e-06, "loss": 0.7847, "step": 904000 }, { "epoch": 0.97, "learning_rate": 2.87663831915958e-06, "loss": 0.7988, "step": 904500 }, { "epoch": 0.97, "learning_rate": 2.861630815407704e-06, "loss": 0.7929, "step": 905000 }, { "epoch": 0.97, "learning_rate": 2.846623311655828e-06, "loss": 0.7907, "step": 905500 }, { "epoch": 0.97, "learning_rate": 2.831615807903952e-06, "loss": 0.7932, "step": 906000 }, { "epoch": 0.97, "learning_rate": 2.816608304152076e-06, "loss": 0.7834, "step": 906500 }, { "epoch": 0.97, "learning_rate": 2.8016008004002e-06, "loss": 0.7859, "step": 907000 }, { "epoch": 0.97, "learning_rate": 2.7865932966483245e-06, "loss": 0.7873, "step": 907500 }, { "epoch": 0.97, "learning_rate": 2.771615807903952e-06, "loss": 0.786, "step": 908000 }, { "epoch": 0.97, "learning_rate": 2.7566083041520763e-06, "loss": 0.7941, "step": 908500 }, { "epoch": 0.97, "learning_rate": 2.7416308154077043e-06, "loss": 0.7962, "step": 909000 }, { "epoch": 0.98, "learning_rate": 2.726623311655828e-06, "loss": 0.7934, "step": 909500 }, { "epoch": 0.98, "learning_rate": 2.7116158079039517e-06, "loss": 0.7874, "step": 910000 }, { "epoch": 0.98, "eval_loss": 0.7403942942619324, "eval_runtime": 608.9146, "eval_samples_per_second": 164.227, "eval_steps_per_second": 41.057, "step": 910000 }, { "epoch": 0.98, "learning_rate": 2.6966383191595797e-06, "loss": 0.7744, "step": 910500 }, { "epoch": 0.98, "learning_rate": 2.681630815407704e-06, "loss": 0.8014, "step": 911000 }, { "epoch": 0.98, "learning_rate": 2.666623311655828e-06, "loss": 0.786, "step": 911500 }, { "epoch": 0.98, "learning_rate": 2.6516158079039523e-06, "loss": 0.7952, "step": 912000 }, { "epoch": 0.98, "learning_rate": 2.6366083041520762e-06, "loss": 0.7962, "step": 912500 }, { "epoch": 0.98, "learning_rate": 2.621630815407704e-06, "loss": 0.793, "step": 913000 }, { "epoch": 0.98, "learning_rate": 2.6066233116558277e-06, "loss": 0.7885, "step": 913500 }, { "epoch": 0.98, "learning_rate": 2.591615807903952e-06, "loss": 0.7883, "step": 914000 }, { "epoch": 0.98, "learning_rate": 2.576608304152076e-06, "loss": 0.7811, "step": 914500 }, { "epoch": 0.98, "learning_rate": 2.5616008004002003e-06, "loss": 0.7862, "step": 915000 }, { "epoch": 0.98, "learning_rate": 2.5465932966483242e-06, "loss": 0.7899, "step": 915500 }, { "epoch": 0.98, "learning_rate": 2.531585792896448e-06, "loss": 0.771, "step": 916000 }, { "epoch": 0.98, "learning_rate": 2.5165782891445725e-06, "loss": 0.7835, "step": 916500 }, { "epoch": 0.98, "learning_rate": 2.5015707853926965e-06, "loss": 0.779, "step": 917000 }, { "epoch": 0.98, "learning_rate": 2.486593296648324e-06, "loss": 0.7986, "step": 917500 }, { "epoch": 0.98, "learning_rate": 2.4715857928964483e-06, "loss": 0.7895, "step": 918000 }, { "epoch": 0.98, "learning_rate": 2.4565782891445723e-06, "loss": 0.7882, "step": 918500 }, { "epoch": 0.99, "learning_rate": 2.441570785392696e-06, "loss": 0.786, "step": 919000 }, { "epoch": 0.99, "learning_rate": 2.4265632816408205e-06, "loss": 0.7866, "step": 919500 }, { "epoch": 0.99, "learning_rate": 2.4115557778889445e-06, "loss": 0.7727, "step": 920000 }, { "epoch": 0.99, "eval_loss": 0.7387272715568542, "eval_runtime": 611.0966, "eval_samples_per_second": 163.64, "eval_steps_per_second": 40.91, "step": 920000 }, { "epoch": 0.99, "learning_rate": 2.3965782891445724e-06, "loss": 0.79, "step": 920500 }, { "epoch": 0.99, "learning_rate": 2.3815707853926968e-06, "loss": 0.7841, "step": 921000 }, { "epoch": 0.99, "learning_rate": 2.3665632816408203e-06, "loss": 0.797, "step": 921500 }, { "epoch": 0.99, "learning_rate": 2.351555777888944e-06, "loss": 0.7758, "step": 922000 }, { "epoch": 0.99, "learning_rate": 2.3365482741370685e-06, "loss": 0.7854, "step": 922500 }, { "epoch": 0.99, "learning_rate": 2.3215707853926965e-06, "loss": 0.7877, "step": 923000 }, { "epoch": 0.99, "learning_rate": 2.3065632816408204e-06, "loss": 0.787, "step": 923500 }, { "epoch": 0.99, "learning_rate": 2.2915557778889448e-06, "loss": 0.7823, "step": 924000 }, { "epoch": 0.99, "learning_rate": 2.2765482741370687e-06, "loss": 0.7831, "step": 924500 }, { "epoch": 0.99, "learning_rate": 2.261540770385193e-06, "loss": 0.7846, "step": 925000 }, { "epoch": 0.99, "learning_rate": 2.2465332666333166e-06, "loss": 0.7889, "step": 925500 }, { "epoch": 0.99, "learning_rate": 2.2315257628814405e-06, "loss": 0.7796, "step": 926000 }, { "epoch": 0.99, "learning_rate": 2.216518259129565e-06, "loss": 0.785, "step": 926500 }, { "epoch": 0.99, "learning_rate": 2.2015407703851928e-06, "loss": 0.8018, "step": 927000 }, { "epoch": 0.99, "learning_rate": 2.1865332666333167e-06, "loss": 0.787, "step": 927500 }, { "epoch": 1.0, "learning_rate": 2.171525762881441e-06, "loss": 0.7962, "step": 928000 }, { "epoch": 1.0, "learning_rate": 2.156518259129565e-06, "loss": 0.783, "step": 928500 }, { "epoch": 1.0, "learning_rate": 2.141540770385193e-06, "loss": 0.8057, "step": 929000 }, { "epoch": 1.0, "learning_rate": 2.1265332666333164e-06, "loss": 0.7904, "step": 929500 }, { "epoch": 1.0, "learning_rate": 2.111525762881441e-06, "loss": 0.7756, "step": 930000 }, { "epoch": 1.0, "eval_loss": 0.7374897003173828, "eval_runtime": 578.7073, "eval_samples_per_second": 172.799, "eval_steps_per_second": 43.2, "step": 930000 }, { "epoch": 1.0, "learning_rate": 2.0965182591295647e-06, "loss": 0.7893, "step": 930500 }, { "epoch": 1.0, "learning_rate": 2.081510755377689e-06, "loss": 0.7763, "step": 931000 }, { "epoch": 1.0, "learning_rate": 2.066503251625813e-06, "loss": 0.7817, "step": 931500 }, { "epoch": 1.0, "learning_rate": 2.051525762881441e-06, "loss": 0.7807, "step": 932000 }, { "epoch": 1.0, "learning_rate": 2.036518259129565e-06, "loss": 0.7849, "step": 932500 }, { "epoch": 1.0, "learning_rate": 2.0215107553776892e-06, "loss": 0.7735, "step": 933000 }, { "epoch": 1.0, "learning_rate": 2.0065032516258127e-06, "loss": 0.7658, "step": 933500 }, { "epoch": 1.0, "learning_rate": 1.991495747873937e-06, "loss": 0.7667, "step": 934000 }, { "epoch": 1.0, "learning_rate": 1.976488244122061e-06, "loss": 0.7655, "step": 934500 }, { "epoch": 1.0, "learning_rate": 1.961480740370185e-06, "loss": 0.7673, "step": 935000 }, { "epoch": 1.0, "learning_rate": 1.9464732366183093e-06, "loss": 0.7603, "step": 935500 }, { "epoch": 1.0, "learning_rate": 1.9314957478739372e-06, "loss": 0.7651, "step": 936000 }, { "epoch": 1.0, "learning_rate": 1.916488244122061e-06, "loss": 0.76, "step": 936500 }, { "epoch": 1.0, "learning_rate": 1.9014807403701853e-06, "loss": 0.762, "step": 937000 }, { "epoch": 1.01, "learning_rate": 1.8864732366183092e-06, "loss": 0.7732, "step": 937500 }, { "epoch": 1.01, "learning_rate": 1.8714657328664334e-06, "loss": 0.764, "step": 938000 }, { "epoch": 1.01, "learning_rate": 1.8564582291145573e-06, "loss": 0.7633, "step": 938500 }, { "epoch": 1.01, "learning_rate": 1.841480740370185e-06, "loss": 0.7701, "step": 939000 }, { "epoch": 1.01, "learning_rate": 1.8264732366183092e-06, "loss": 0.7654, "step": 939500 }, { "epoch": 1.01, "learning_rate": 1.8114657328664333e-06, "loss": 0.7588, "step": 940000 }, { "epoch": 1.01, "eval_loss": 0.737705409526825, "eval_runtime": 579.0479, "eval_samples_per_second": 172.697, "eval_steps_per_second": 43.174, "step": 940000 }, { "epoch": 1.01, "learning_rate": 1.7964582291145573e-06, "loss": 0.7784, "step": 940500 }, { "epoch": 1.01, "learning_rate": 1.7814507253626814e-06, "loss": 0.7521, "step": 941000 }, { "epoch": 1.01, "learning_rate": 1.7664432216108055e-06, "loss": 0.7575, "step": 941500 }, { "epoch": 1.01, "learning_rate": 1.7514357178589297e-06, "loss": 0.7786, "step": 942000 }, { "epoch": 1.01, "learning_rate": 1.7364582291145572e-06, "loss": 0.7631, "step": 942500 }, { "epoch": 1.01, "learning_rate": 1.7214507253626813e-06, "loss": 0.756, "step": 943000 }, { "epoch": 1.01, "learning_rate": 1.7064432216108055e-06, "loss": 0.7576, "step": 943500 }, { "epoch": 1.01, "learning_rate": 1.6914357178589296e-06, "loss": 0.7712, "step": 944000 }, { "epoch": 1.01, "learning_rate": 1.6764282141070535e-06, "loss": 0.7553, "step": 944500 }, { "epoch": 1.01, "learning_rate": 1.6614507253626813e-06, "loss": 0.7592, "step": 945000 }, { "epoch": 1.01, "learning_rate": 1.6464432216108054e-06, "loss": 0.7677, "step": 945500 }, { "epoch": 1.01, "learning_rate": 1.6314357178589296e-06, "loss": 0.7628, "step": 946000 }, { "epoch": 1.01, "learning_rate": 1.6164282141070535e-06, "loss": 0.7724, "step": 946500 }, { "epoch": 1.02, "learning_rate": 1.6014207103551776e-06, "loss": 0.7541, "step": 947000 }, { "epoch": 1.02, "learning_rate": 1.5864432216108056e-06, "loss": 0.7656, "step": 947500 }, { "epoch": 1.02, "learning_rate": 1.5714357178589295e-06, "loss": 0.7726, "step": 948000 }, { "epoch": 1.02, "learning_rate": 1.5564282141070534e-06, "loss": 0.755, "step": 948500 }, { "epoch": 1.02, "learning_rate": 1.5414207103551776e-06, "loss": 0.7555, "step": 949000 }, { "epoch": 1.02, "learning_rate": 1.5264132066033017e-06, "loss": 0.765, "step": 949500 }, { "epoch": 1.02, "learning_rate": 1.5114057028514259e-06, "loss": 0.7734, "step": 950000 }, { "epoch": 1.02, "eval_loss": 0.736987292766571, "eval_runtime": 575.6732, "eval_samples_per_second": 173.71, "eval_steps_per_second": 43.427, "step": 950000 }, { "epoch": 1.02, "learning_rate": 1.4963981990995498e-06, "loss": 0.7605, "step": 950500 }, { "epoch": 1.02, "learning_rate": 1.4814207103551775e-06, "loss": 0.7677, "step": 951000 }, { "epoch": 1.02, "learning_rate": 1.4664132066033016e-06, "loss": 0.764, "step": 951500 }, { "epoch": 1.02, "learning_rate": 1.4514057028514258e-06, "loss": 0.7535, "step": 952000 }, { "epoch": 1.02, "learning_rate": 1.4363981990995497e-06, "loss": 0.7746, "step": 952500 }, { "epoch": 1.02, "learning_rate": 1.4213906953476739e-06, "loss": 0.7642, "step": 953000 }, { "epoch": 1.02, "learning_rate": 1.406383191595798e-06, "loss": 0.7574, "step": 953500 }, { "epoch": 1.02, "learning_rate": 1.391375687843922e-06, "loss": 0.7556, "step": 954000 }, { "epoch": 1.02, "learning_rate": 1.3763981990995497e-06, "loss": 0.7684, "step": 954500 }, { "epoch": 1.02, "learning_rate": 1.3613906953476738e-06, "loss": 0.7678, "step": 955000 }, { "epoch": 1.02, "learning_rate": 1.346383191595798e-06, "loss": 0.7582, "step": 955500 }, { "epoch": 1.03, "learning_rate": 1.331375687843922e-06, "loss": 0.7699, "step": 956000 }, { "epoch": 1.03, "learning_rate": 1.316368184092046e-06, "loss": 0.7704, "step": 956500 }, { "epoch": 1.03, "learning_rate": 1.3013606803401702e-06, "loss": 0.7631, "step": 957000 }, { "epoch": 1.03, "learning_rate": 1.2863531765882943e-06, "loss": 0.7781, "step": 957500 }, { "epoch": 1.03, "learning_rate": 1.2713456728364182e-06, "loss": 0.7707, "step": 958000 }, { "epoch": 1.03, "learning_rate": 1.256368184092046e-06, "loss": 0.7599, "step": 958500 }, { "epoch": 1.03, "learning_rate": 1.24136068034017e-06, "loss": 0.7639, "step": 959000 }, { "epoch": 1.03, "learning_rate": 1.2263531765882942e-06, "loss": 0.7784, "step": 959500 }, { "epoch": 1.03, "learning_rate": 1.2113456728364182e-06, "loss": 0.7563, "step": 960000 }, { "epoch": 1.03, "eval_loss": 0.7364427447319031, "eval_runtime": 576.5734, "eval_samples_per_second": 173.438, "eval_steps_per_second": 43.36, "step": 960000 }, { "epoch": 1.03, "learning_rate": 1.196368184092046e-06, "loss": 0.7694, "step": 960500 }, { "epoch": 1.03, "learning_rate": 1.18136068034017e-06, "loss": 0.7588, "step": 961000 }, { "epoch": 1.03, "learning_rate": 1.1663531765882942e-06, "loss": 0.7441, "step": 961500 }, { "epoch": 1.03, "learning_rate": 1.1513756878439221e-06, "loss": 0.7665, "step": 962000 }, { "epoch": 1.03, "learning_rate": 1.136368184092046e-06, "loss": 0.7515, "step": 962500 }, { "epoch": 1.03, "learning_rate": 1.1213606803401702e-06, "loss": 0.7683, "step": 963000 }, { "epoch": 1.03, "learning_rate": 1.1063531765882941e-06, "loss": 0.7637, "step": 963500 }, { "epoch": 1.03, "learning_rate": 1.0913456728364183e-06, "loss": 0.7537, "step": 964000 }, { "epoch": 1.03, "learning_rate": 1.076368184092046e-06, "loss": 0.7667, "step": 964500 }, { "epoch": 1.03, "learning_rate": 1.0613606803401701e-06, "loss": 0.7663, "step": 965000 }, { "epoch": 1.04, "learning_rate": 1.0463531765882943e-06, "loss": 0.7604, "step": 965500 }, { "epoch": 1.04, "learning_rate": 1.0313456728364184e-06, "loss": 0.7534, "step": 966000 }, { "epoch": 1.04, "learning_rate": 1.0163381690845423e-06, "loss": 0.771, "step": 966500 }, { "epoch": 1.04, "learning_rate": 1.0013306653326663e-06, "loss": 0.7567, "step": 967000 }, { "epoch": 1.04, "learning_rate": 9.863531765882942e-07, "loss": 0.7632, "step": 967500 }, { "epoch": 1.04, "learning_rate": 9.713456728364183e-07, "loss": 0.7728, "step": 968000 }, { "epoch": 1.04, "learning_rate": 9.563381690845423e-07, "loss": 0.754, "step": 968500 }, { "epoch": 1.04, "learning_rate": 9.413306653326664e-07, "loss": 0.7629, "step": 969000 }, { "epoch": 1.04, "learning_rate": 9.263231615807905e-07, "loss": 0.7696, "step": 969500 }, { "epoch": 1.04, "learning_rate": 9.113156578289145e-07, "loss": 0.7753, "step": 970000 }, { "epoch": 1.04, "eval_loss": 0.7353302240371704, "eval_runtime": 645.1885, "eval_samples_per_second": 154.993, "eval_steps_per_second": 38.748, "step": 970000 }, { "epoch": 1.04, "learning_rate": 8.963081540770385e-07, "loss": 0.7666, "step": 970500 }, { "epoch": 1.04, "learning_rate": 8.813006503251626e-07, "loss": 0.7675, "step": 971000 }, { "epoch": 1.04, "learning_rate": 8.663231615807905e-07, "loss": 0.777, "step": 971500 }, { "epoch": 1.04, "learning_rate": 8.513156578289144e-07, "loss": 0.7657, "step": 972000 }, { "epoch": 1.04, "learning_rate": 8.363081540770386e-07, "loss": 0.7698, "step": 972500 }, { "epoch": 1.04, "learning_rate": 8.213006503251626e-07, "loss": 0.7571, "step": 973000 }, { "epoch": 1.04, "learning_rate": 8.063231615807904e-07, "loss": 0.7628, "step": 973500 }, { "epoch": 1.04, "learning_rate": 7.913156578289145e-07, "loss": 0.7672, "step": 974000 }, { "epoch": 1.05, "learning_rate": 7.763081540770386e-07, "loss": 0.7677, "step": 974500 }, { "epoch": 1.05, "learning_rate": 7.613306653326663e-07, "loss": 0.7624, "step": 975000 }, { "epoch": 1.05, "learning_rate": 7.463231615807905e-07, "loss": 0.7617, "step": 975500 }, { "epoch": 1.05, "learning_rate": 7.313156578289144e-07, "loss": 0.7627, "step": 976000 }, { "epoch": 1.05, "learning_rate": 7.163081540770386e-07, "loss": 0.7499, "step": 976500 }, { "epoch": 1.05, "learning_rate": 7.013006503251626e-07, "loss": 0.7753, "step": 977000 }, { "epoch": 1.05, "learning_rate": 6.862931465732867e-07, "loss": 0.7663, "step": 977500 }, { "epoch": 1.05, "learning_rate": 6.713156578289145e-07, "loss": 0.7773, "step": 978000 }, { "epoch": 1.05, "learning_rate": 6.563381690845423e-07, "loss": 0.7573, "step": 978500 }, { "epoch": 1.05, "learning_rate": 6.413306653326663e-07, "loss": 0.7679, "step": 979000 }, { "epoch": 1.05, "learning_rate": 6.263231615807905e-07, "loss": 0.7665, "step": 979500 }, { "epoch": 1.05, "learning_rate": 6.113156578289144e-07, "loss": 0.7651, "step": 980000 }, { "epoch": 1.05, "eval_loss": 0.7347660064697266, "eval_runtime": 596.5908, "eval_samples_per_second": 167.619, "eval_steps_per_second": 41.905, "step": 980000 }, { "epoch": 1.05, "learning_rate": 5.963081540770385e-07, "loss": 0.7661, "step": 980500 }, { "epoch": 1.05, "learning_rate": 5.813006503251626e-07, "loss": 0.7551, "step": 981000 }, { "epoch": 1.05, "learning_rate": 5.662931465732867e-07, "loss": 0.7647, "step": 981500 }, { "epoch": 1.05, "learning_rate": 5.512856428214107e-07, "loss": 0.7543, "step": 982000 }, { "epoch": 1.05, "learning_rate": 5.362781390695348e-07, "loss": 0.7666, "step": 982500 }, { "epoch": 1.05, "learning_rate": 5.212706353176588e-07, "loss": 0.7721, "step": 983000 }, { "epoch": 1.05, "learning_rate": 5.06263131565783e-07, "loss": 0.7605, "step": 983500 }, { "epoch": 1.06, "learning_rate": 4.912856428214107e-07, "loss": 0.7602, "step": 984000 }, { "epoch": 1.06, "learning_rate": 4.762781390695348e-07, "loss": 0.7553, "step": 984500 }, { "epoch": 1.06, "learning_rate": 4.612706353176588e-07, "loss": 0.7499, "step": 985000 }, { "epoch": 1.06, "learning_rate": 4.462631315657829e-07, "loss": 0.7592, "step": 985500 }, { "epoch": 1.06, "learning_rate": 4.3125562781390695e-07, "loss": 0.7653, "step": 986000 }, { "epoch": 1.06, "learning_rate": 4.1624812406203104e-07, "loss": 0.7705, "step": 986500 }, { "epoch": 1.06, "learning_rate": 4.012406203101551e-07, "loss": 0.7631, "step": 987000 }, { "epoch": 1.06, "learning_rate": 3.862631315657829e-07, "loss": 0.7501, "step": 987500 }, { "epoch": 1.06, "learning_rate": 3.7125562781390694e-07, "loss": 0.7564, "step": 988000 }, { "epoch": 1.06, "learning_rate": 3.5624812406203103e-07, "loss": 0.7495, "step": 988500 }, { "epoch": 1.06, "learning_rate": 3.4124062031015507e-07, "loss": 0.7605, "step": 989000 }, { "epoch": 1.06, "learning_rate": 3.2623311655827916e-07, "loss": 0.7698, "step": 989500 }, { "epoch": 1.06, "learning_rate": 3.1125562781390693e-07, "loss": 0.7526, "step": 990000 }, { "epoch": 1.06, "eval_loss": 0.7347068190574646, "eval_runtime": 582.4895, "eval_samples_per_second": 171.677, "eval_steps_per_second": 42.919, "step": 990000 }, { "epoch": 1.06, "learning_rate": 2.96248124062031e-07, "loss": 0.7615, "step": 990500 }, { "epoch": 1.06, "learning_rate": 2.8124062031015506e-07, "loss": 0.7677, "step": 991000 }, { "epoch": 1.06, "learning_rate": 2.6623311655827915e-07, "loss": 0.754, "step": 991500 }, { "epoch": 1.06, "learning_rate": 2.512256128064032e-07, "loss": 0.7632, "step": 992000 }, { "epoch": 1.06, "learning_rate": 2.3621810905452727e-07, "loss": 0.7677, "step": 992500 }, { "epoch": 1.06, "learning_rate": 2.2121060530265134e-07, "loss": 0.7546, "step": 993000 }, { "epoch": 1.07, "learning_rate": 2.0623311655827914e-07, "loss": 0.7602, "step": 993500 }, { "epoch": 1.07, "learning_rate": 1.9125562781390697e-07, "loss": 0.7498, "step": 994000 }, { "epoch": 1.07, "learning_rate": 1.7624812406203103e-07, "loss": 0.7587, "step": 994500 }, { "epoch": 1.07, "learning_rate": 1.612406203101551e-07, "loss": 0.7581, "step": 995000 }, { "epoch": 1.07, "learning_rate": 1.4623311655827916e-07, "loss": 0.7569, "step": 995500 }, { "epoch": 1.07, "learning_rate": 1.3122561280640322e-07, "loss": 0.7696, "step": 996000 }, { "epoch": 1.07, "learning_rate": 1.1621810905452727e-07, "loss": 0.7511, "step": 996500 }, { "epoch": 1.07, "learning_rate": 1.0121060530265133e-07, "loss": 0.7672, "step": 997000 }, { "epoch": 1.07, "learning_rate": 8.620310155077538e-08, "loss": 0.7461, "step": 997500 }, { "epoch": 1.07, "learning_rate": 7.119559779889944e-08, "loss": 0.7562, "step": 998000 }, { "epoch": 1.07, "learning_rate": 5.6188094047023514e-08, "loss": 0.7609, "step": 998500 }, { "epoch": 1.07, "learning_rate": 4.1210605302651325e-08, "loss": 0.7588, "step": 999000 }, { "epoch": 1.07, "learning_rate": 2.6203101550775387e-08, "loss": 0.7513, "step": 999500 }, { "epoch": 1.07, "learning_rate": 1.119559779889945e-08, "loss": 0.755, "step": 1000000 }, { "epoch": 1.07, "eval_loss": 0.7345585823059082, "eval_runtime": 579.2567, "eval_samples_per_second": 172.635, "eval_steps_per_second": 43.159, "step": 1000000 } ], "max_steps": 1000000, "num_train_epochs": 2, "total_flos": 2.242924955382055e+18, "trial_name": null, "trial_params": null }