{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9995835068721366, "global_step": 168000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9940500981733805e-05, "loss": 1.1973, "step": 500 }, { "epoch": 0.01, "learning_rate": 1.9881001963467605e-05, "loss": 1.2168, "step": 1000 }, { "epoch": 0.01, "learning_rate": 1.9821502945201405e-05, "loss": 1.1454, "step": 1500 }, { "epoch": 0.01, "learning_rate": 1.9762003926935208e-05, "loss": 1.1647, "step": 2000 }, { "epoch": 0.01, "learning_rate": 1.9702504908669008e-05, "loss": 1.1795, "step": 2500 }, { "epoch": 0.02, "learning_rate": 1.964300589040281e-05, "loss": 1.1723, "step": 3000 }, { "epoch": 0.02, "learning_rate": 1.958350687213661e-05, "loss": 1.157, "step": 3500 }, { "epoch": 0.02, "learning_rate": 1.952400785387041e-05, "loss": 1.1699, "step": 4000 }, { "epoch": 0.03, "learning_rate": 1.9464508835604215e-05, "loss": 1.1593, "step": 4500 }, { "epoch": 0.03, "learning_rate": 1.9405009817338018e-05, "loss": 1.1421, "step": 5000 }, { "epoch": 0.03, "learning_rate": 1.9345510799071814e-05, "loss": 1.0648, "step": 5500 }, { "epoch": 0.04, "learning_rate": 1.9286011780805618e-05, "loss": 1.1345, "step": 6000 }, { "epoch": 0.04, "learning_rate": 1.922651276253942e-05, "loss": 1.1722, "step": 6500 }, { "epoch": 0.04, "learning_rate": 1.916701374427322e-05, "loss": 1.1588, "step": 7000 }, { "epoch": 0.04, "learning_rate": 1.910751472600702e-05, "loss": 1.1502, "step": 7500 }, { "epoch": 0.05, "learning_rate": 1.9048015707740824e-05, "loss": 1.1297, "step": 8000 }, { "epoch": 0.05, "learning_rate": 1.8988516689474624e-05, "loss": 1.1478, "step": 8500 }, { "epoch": 0.05, "learning_rate": 1.8929017671208427e-05, "loss": 1.1025, "step": 9000 }, { "epoch": 0.06, "learning_rate": 1.8869518652942227e-05, "loss": 1.126, "step": 9500 }, { "epoch": 0.06, "learning_rate": 1.8810019634676027e-05, "loss": 1.1438, "step": 10000 }, { "epoch": 0.06, "learning_rate": 1.875052061640983e-05, "loss": 1.1064, "step": 10500 }, { "epoch": 0.07, "learning_rate": 1.8691021598143634e-05, "loss": 1.1369, "step": 11000 }, { "epoch": 0.07, "learning_rate": 1.8631522579877434e-05, "loss": 1.1036, "step": 11500 }, { "epoch": 0.07, "learning_rate": 1.8572023561611234e-05, "loss": 1.0771, "step": 12000 }, { "epoch": 0.07, "learning_rate": 1.8512524543345037e-05, "loss": 1.0841, "step": 12500 }, { "epoch": 0.08, "learning_rate": 1.8453025525078837e-05, "loss": 1.0818, "step": 13000 }, { "epoch": 0.08, "learning_rate": 1.839352650681264e-05, "loss": 1.1462, "step": 13500 }, { "epoch": 0.08, "learning_rate": 1.833402748854644e-05, "loss": 1.1335, "step": 14000 }, { "epoch": 0.09, "learning_rate": 1.827452847028024e-05, "loss": 1.079, "step": 14500 }, { "epoch": 0.09, "learning_rate": 1.8215029452014044e-05, "loss": 1.1276, "step": 15000 }, { "epoch": 0.09, "learning_rate": 1.8155530433747847e-05, "loss": 1.1119, "step": 15500 }, { "epoch": 0.1, "learning_rate": 1.8096031415481647e-05, "loss": 1.1343, "step": 16000 }, { "epoch": 0.1, "learning_rate": 1.8036532397215447e-05, "loss": 1.0787, "step": 16500 }, { "epoch": 0.1, "learning_rate": 1.797703337894925e-05, "loss": 1.1122, "step": 17000 }, { "epoch": 0.1, "learning_rate": 1.791753436068305e-05, "loss": 1.0904, "step": 17500 }, { "epoch": 0.11, "learning_rate": 1.785803534241685e-05, "loss": 1.0882, "step": 18000 }, { "epoch": 0.11, "learning_rate": 1.7798536324150653e-05, "loss": 1.1311, "step": 18500 }, { "epoch": 0.11, "learning_rate": 1.7739037305884453e-05, "loss": 1.0936, "step": 19000 }, { "epoch": 0.12, "learning_rate": 1.7679538287618257e-05, "loss": 1.0959, "step": 19500 }, { "epoch": 0.12, "learning_rate": 1.7620039269352056e-05, "loss": 1.0879, "step": 20000 }, { "epoch": 0.12, "learning_rate": 1.7560540251085856e-05, "loss": 1.0763, "step": 20500 }, { "epoch": 0.12, "learning_rate": 1.750104123281966e-05, "loss": 1.0713, "step": 21000 }, { "epoch": 0.13, "learning_rate": 1.7441542214553463e-05, "loss": 1.1204, "step": 21500 }, { "epoch": 0.13, "learning_rate": 1.7382043196287263e-05, "loss": 1.0808, "step": 22000 }, { "epoch": 0.13, "learning_rate": 1.7322544178021063e-05, "loss": 1.1301, "step": 22500 }, { "epoch": 0.14, "learning_rate": 1.7263045159754866e-05, "loss": 1.1141, "step": 23000 }, { "epoch": 0.14, "learning_rate": 1.7203546141488666e-05, "loss": 1.109, "step": 23500 }, { "epoch": 0.14, "learning_rate": 1.714404712322247e-05, "loss": 1.0691, "step": 24000 }, { "epoch": 0.15, "learning_rate": 1.708454810495627e-05, "loss": 1.0805, "step": 24500 }, { "epoch": 0.15, "learning_rate": 1.702504908669007e-05, "loss": 1.0795, "step": 25000 }, { "epoch": 0.15, "learning_rate": 1.6965550068423873e-05, "loss": 1.0431, "step": 25500 }, { "epoch": 0.15, "learning_rate": 1.6906051050157676e-05, "loss": 1.0553, "step": 26000 }, { "epoch": 0.16, "learning_rate": 1.6846552031891476e-05, "loss": 1.0786, "step": 26500 }, { "epoch": 0.16, "learning_rate": 1.6787053013625276e-05, "loss": 1.1008, "step": 27000 }, { "epoch": 0.16, "learning_rate": 1.672755399535908e-05, "loss": 1.0776, "step": 27500 }, { "epoch": 0.17, "learning_rate": 1.666805497709288e-05, "loss": 1.0993, "step": 28000 }, { "epoch": 0.17, "learning_rate": 1.660855595882668e-05, "loss": 1.1101, "step": 28500 }, { "epoch": 0.17, "learning_rate": 1.6549056940560482e-05, "loss": 1.0791, "step": 29000 }, { "epoch": 0.18, "learning_rate": 1.6489557922294282e-05, "loss": 1.0705, "step": 29500 }, { "epoch": 0.18, "learning_rate": 1.6430058904028086e-05, "loss": 1.0618, "step": 30000 }, { "epoch": 0.18, "learning_rate": 1.6370559885761885e-05, "loss": 1.0408, "step": 30500 }, { "epoch": 0.18, "learning_rate": 1.631106086749569e-05, "loss": 1.1367, "step": 31000 }, { "epoch": 0.19, "learning_rate": 1.625156184922949e-05, "loss": 1.0727, "step": 31500 }, { "epoch": 0.19, "learning_rate": 1.6192062830963292e-05, "loss": 1.0444, "step": 32000 }, { "epoch": 0.19, "learning_rate": 1.6132563812697092e-05, "loss": 1.0723, "step": 32500 }, { "epoch": 0.2, "learning_rate": 1.6073064794430892e-05, "loss": 1.0627, "step": 33000 }, { "epoch": 0.2, "learning_rate": 1.6013565776164695e-05, "loss": 1.0835, "step": 33500 }, { "epoch": 0.2, "learning_rate": 1.5954066757898495e-05, "loss": 1.1624, "step": 34000 }, { "epoch": 0.21, "learning_rate": 1.58945677396323e-05, "loss": 1.0383, "step": 34500 }, { "epoch": 0.21, "learning_rate": 1.58350687213661e-05, "loss": 1.0805, "step": 35000 }, { "epoch": 0.21, "learning_rate": 1.57755697030999e-05, "loss": 1.0426, "step": 35500 }, { "epoch": 0.21, "learning_rate": 1.57160706848337e-05, "loss": 1.1281, "step": 36000 }, { "epoch": 0.22, "learning_rate": 1.5656571666567505e-05, "loss": 1.0695, "step": 36500 }, { "epoch": 0.22, "learning_rate": 1.5597072648301305e-05, "loss": 1.0617, "step": 37000 }, { "epoch": 0.22, "learning_rate": 1.5537573630035105e-05, "loss": 1.0849, "step": 37500 }, { "epoch": 0.23, "learning_rate": 1.5478074611768908e-05, "loss": 1.0923, "step": 38000 }, { "epoch": 0.23, "learning_rate": 1.5418575593502708e-05, "loss": 1.1421, "step": 38500 }, { "epoch": 0.23, "learning_rate": 1.5359076575236508e-05, "loss": 1.0981, "step": 39000 }, { "epoch": 0.24, "learning_rate": 1.529957755697031e-05, "loss": 1.0991, "step": 39500 }, { "epoch": 0.24, "learning_rate": 1.5240078538704113e-05, "loss": 1.0568, "step": 40000 }, { "epoch": 0.24, "learning_rate": 1.5180579520437915e-05, "loss": 1.1249, "step": 40500 }, { "epoch": 0.24, "learning_rate": 1.5121080502171714e-05, "loss": 1.0764, "step": 41000 }, { "epoch": 0.25, "learning_rate": 1.5061581483905516e-05, "loss": 1.0582, "step": 41500 }, { "epoch": 0.25, "learning_rate": 1.5002082465639318e-05, "loss": 1.0795, "step": 42000 }, { "epoch": 0.25, "learning_rate": 1.494258344737312e-05, "loss": 1.0514, "step": 42500 }, { "epoch": 0.26, "learning_rate": 1.488308442910692e-05, "loss": 1.0427, "step": 43000 }, { "epoch": 0.26, "learning_rate": 1.4823585410840723e-05, "loss": 1.0842, "step": 43500 }, { "epoch": 0.26, "learning_rate": 1.4764086392574524e-05, "loss": 1.0961, "step": 44000 }, { "epoch": 0.26, "learning_rate": 1.4704587374308326e-05, "loss": 1.1226, "step": 44500 }, { "epoch": 0.27, "learning_rate": 1.4645088356042127e-05, "loss": 1.0607, "step": 45000 }, { "epoch": 0.27, "learning_rate": 1.4585589337775927e-05, "loss": 1.0479, "step": 45500 }, { "epoch": 0.27, "learning_rate": 1.4526090319509729e-05, "loss": 1.0572, "step": 46000 }, { "epoch": 0.28, "learning_rate": 1.446659130124353e-05, "loss": 1.1465, "step": 46500 }, { "epoch": 0.28, "learning_rate": 1.4407092282977332e-05, "loss": 1.041, "step": 47000 }, { "epoch": 0.28, "learning_rate": 1.4347593264711132e-05, "loss": 1.0366, "step": 47500 }, { "epoch": 0.29, "learning_rate": 1.4288094246444934e-05, "loss": 1.0661, "step": 48000 }, { "epoch": 0.29, "learning_rate": 1.4228595228178737e-05, "loss": 1.0495, "step": 48500 }, { "epoch": 0.29, "learning_rate": 1.4169096209912539e-05, "loss": 1.0769, "step": 49000 }, { "epoch": 0.29, "learning_rate": 1.4109597191646339e-05, "loss": 1.0329, "step": 49500 }, { "epoch": 0.3, "learning_rate": 1.405009817338014e-05, "loss": 1.0702, "step": 50000 }, { "epoch": 0.3, "learning_rate": 1.3990599155113942e-05, "loss": 1.0354, "step": 50500 }, { "epoch": 0.3, "learning_rate": 1.3931100136847744e-05, "loss": 1.0817, "step": 51000 }, { "epoch": 0.31, "learning_rate": 1.3871601118581544e-05, "loss": 1.0362, "step": 51500 }, { "epoch": 0.31, "learning_rate": 1.3812102100315345e-05, "loss": 1.0761, "step": 52000 }, { "epoch": 0.31, "learning_rate": 1.3752603082049147e-05, "loss": 1.0295, "step": 52500 }, { "epoch": 0.32, "learning_rate": 1.369310406378295e-05, "loss": 1.0365, "step": 53000 }, { "epoch": 0.32, "learning_rate": 1.3633605045516748e-05, "loss": 1.0378, "step": 53500 }, { "epoch": 0.32, "learning_rate": 1.3574106027250552e-05, "loss": 1.0172, "step": 54000 }, { "epoch": 0.32, "learning_rate": 1.3514607008984353e-05, "loss": 1.0553, "step": 54500 }, { "epoch": 0.33, "learning_rate": 1.3455107990718155e-05, "loss": 1.0595, "step": 55000 }, { "epoch": 0.33, "learning_rate": 1.3395608972451957e-05, "loss": 1.0443, "step": 55500 }, { "epoch": 0.33, "learning_rate": 1.3336109954185756e-05, "loss": 1.0359, "step": 56000 }, { "epoch": 0.34, "learning_rate": 1.3276610935919558e-05, "loss": 1.0631, "step": 56500 }, { "epoch": 0.34, "learning_rate": 1.321711191765336e-05, "loss": 1.0293, "step": 57000 }, { "epoch": 0.34, "learning_rate": 1.3157612899387161e-05, "loss": 1.0878, "step": 57500 }, { "epoch": 0.35, "learning_rate": 1.3098113881120961e-05, "loss": 1.054, "step": 58000 }, { "epoch": 0.35, "learning_rate": 1.3038614862854765e-05, "loss": 1.0523, "step": 58500 }, { "epoch": 0.35, "learning_rate": 1.2979115844588566e-05, "loss": 1.0117, "step": 59000 }, { "epoch": 0.35, "learning_rate": 1.2919616826322368e-05, "loss": 1.0612, "step": 59500 }, { "epoch": 0.36, "learning_rate": 1.2860117808056168e-05, "loss": 1.0106, "step": 60000 }, { "epoch": 0.36, "learning_rate": 1.280061878978997e-05, "loss": 1.026, "step": 60500 }, { "epoch": 0.36, "learning_rate": 1.2741119771523771e-05, "loss": 1.0533, "step": 61000 }, { "epoch": 0.37, "learning_rate": 1.2681620753257573e-05, "loss": 1.0651, "step": 61500 }, { "epoch": 0.37, "learning_rate": 1.2622121734991373e-05, "loss": 1.0717, "step": 62000 }, { "epoch": 0.37, "learning_rate": 1.2562622716725174e-05, "loss": 1.0679, "step": 62500 }, { "epoch": 0.37, "learning_rate": 1.2503123698458976e-05, "loss": 1.0594, "step": 63000 }, { "epoch": 0.38, "learning_rate": 1.2443624680192779e-05, "loss": 1.0457, "step": 63500 }, { "epoch": 0.38, "learning_rate": 1.2384125661926577e-05, "loss": 1.0491, "step": 64000 }, { "epoch": 0.38, "learning_rate": 1.232462664366038e-05, "loss": 1.0212, "step": 64500 }, { "epoch": 0.39, "learning_rate": 1.2265127625394182e-05, "loss": 1.0079, "step": 65000 }, { "epoch": 0.39, "learning_rate": 1.2205628607127984e-05, "loss": 1.0513, "step": 65500 }, { "epoch": 0.39, "learning_rate": 1.2146129588861786e-05, "loss": 1.0651, "step": 66000 }, { "epoch": 0.4, "learning_rate": 1.2086630570595585e-05, "loss": 1.1196, "step": 66500 }, { "epoch": 0.4, "learning_rate": 1.2027131552329387e-05, "loss": 1.0522, "step": 67000 }, { "epoch": 0.4, "learning_rate": 1.1967632534063189e-05, "loss": 1.0775, "step": 67500 }, { "epoch": 0.4, "learning_rate": 1.1908133515796992e-05, "loss": 1.0494, "step": 68000 }, { "epoch": 0.41, "learning_rate": 1.184863449753079e-05, "loss": 1.0312, "step": 68500 }, { "epoch": 0.41, "learning_rate": 1.1789135479264594e-05, "loss": 0.9866, "step": 69000 }, { "epoch": 0.41, "learning_rate": 1.1729636460998395e-05, "loss": 1.0072, "step": 69500 }, { "epoch": 0.42, "learning_rate": 1.1670137442732197e-05, "loss": 1.0145, "step": 70000 }, { "epoch": 0.42, "learning_rate": 1.1610638424465997e-05, "loss": 1.0334, "step": 70500 }, { "epoch": 0.42, "learning_rate": 1.1551139406199798e-05, "loss": 1.1029, "step": 71000 }, { "epoch": 0.43, "learning_rate": 1.14916403879336e-05, "loss": 1.0556, "step": 71500 }, { "epoch": 0.43, "learning_rate": 1.1432141369667402e-05, "loss": 1.0046, "step": 72000 }, { "epoch": 0.43, "learning_rate": 1.1372642351401202e-05, "loss": 1.0827, "step": 72500 }, { "epoch": 0.43, "learning_rate": 1.1313143333135003e-05, "loss": 1.0392, "step": 73000 }, { "epoch": 0.44, "learning_rate": 1.1253644314868807e-05, "loss": 1.0635, "step": 73500 }, { "epoch": 0.44, "learning_rate": 1.1194145296602608e-05, "loss": 1.0246, "step": 74000 }, { "epoch": 0.44, "learning_rate": 1.1134646278336408e-05, "loss": 1.0271, "step": 74500 }, { "epoch": 0.45, "learning_rate": 1.107514726007021e-05, "loss": 0.9673, "step": 75000 }, { "epoch": 0.45, "learning_rate": 1.1015648241804011e-05, "loss": 1.0277, "step": 75500 }, { "epoch": 0.45, "learning_rate": 1.0956149223537813e-05, "loss": 1.0755, "step": 76000 }, { "epoch": 0.46, "learning_rate": 1.0896650205271615e-05, "loss": 1.017, "step": 76500 }, { "epoch": 0.46, "learning_rate": 1.0837151187005414e-05, "loss": 1.012, "step": 77000 }, { "epoch": 0.46, "learning_rate": 1.0777652168739216e-05, "loss": 1.0078, "step": 77500 }, { "epoch": 0.46, "learning_rate": 1.0718153150473018e-05, "loss": 1.0136, "step": 78000 }, { "epoch": 0.47, "learning_rate": 1.0658654132206821e-05, "loss": 1.0571, "step": 78500 }, { "epoch": 0.47, "learning_rate": 1.059915511394062e-05, "loss": 1.0558, "step": 79000 }, { "epoch": 0.47, "learning_rate": 1.0539656095674423e-05, "loss": 0.9997, "step": 79500 }, { "epoch": 0.48, "learning_rate": 1.0480157077408224e-05, "loss": 1.0411, "step": 80000 }, { "epoch": 0.48, "learning_rate": 1.0420658059142026e-05, "loss": 1.0569, "step": 80500 }, { "epoch": 0.48, "learning_rate": 1.0361159040875826e-05, "loss": 1.0182, "step": 81000 }, { "epoch": 0.48, "learning_rate": 1.0301660022609627e-05, "loss": 1.028, "step": 81500 }, { "epoch": 0.49, "learning_rate": 1.0242161004343429e-05, "loss": 1.0548, "step": 82000 }, { "epoch": 0.49, "learning_rate": 1.018266198607723e-05, "loss": 1.0717, "step": 82500 }, { "epoch": 0.49, "learning_rate": 1.012316296781103e-05, "loss": 1.0518, "step": 83000 }, { "epoch": 0.5, "learning_rate": 1.0063663949544832e-05, "loss": 1.025, "step": 83500 }, { "epoch": 0.5, "learning_rate": 1.0004164931278636e-05, "loss": 1.0131, "step": 84000 }, { "epoch": 0.5, "learning_rate": 9.944665913012435e-06, "loss": 1.0403, "step": 84500 }, { "epoch": 0.51, "learning_rate": 9.885166894746237e-06, "loss": 0.9678, "step": 85000 }, { "epoch": 0.51, "learning_rate": 9.825667876480039e-06, "loss": 1.0123, "step": 85500 }, { "epoch": 0.51, "learning_rate": 9.76616885821384e-06, "loss": 1.0033, "step": 86000 }, { "epoch": 0.51, "learning_rate": 9.706669839947642e-06, "loss": 1.0143, "step": 86500 }, { "epoch": 0.52, "learning_rate": 9.647170821681444e-06, "loss": 1.0296, "step": 87000 }, { "epoch": 0.52, "learning_rate": 9.587671803415245e-06, "loss": 1.0152, "step": 87500 }, { "epoch": 0.52, "learning_rate": 9.528172785149045e-06, "loss": 1.0504, "step": 88000 }, { "epoch": 0.53, "learning_rate": 9.468673766882848e-06, "loss": 1.0308, "step": 88500 }, { "epoch": 0.53, "learning_rate": 9.409174748616648e-06, "loss": 1.0092, "step": 89000 }, { "epoch": 0.53, "learning_rate": 9.34967573035045e-06, "loss": 1.0669, "step": 89500 }, { "epoch": 0.54, "learning_rate": 9.290176712084252e-06, "loss": 1.0227, "step": 90000 }, { "epoch": 0.54, "learning_rate": 9.230677693818053e-06, "loss": 1.0142, "step": 90500 }, { "epoch": 0.54, "learning_rate": 9.171178675551853e-06, "loss": 1.073, "step": 91000 }, { "epoch": 0.54, "learning_rate": 9.111679657285657e-06, "loss": 1.0564, "step": 91500 }, { "epoch": 0.55, "learning_rate": 9.052180639019456e-06, "loss": 1.0278, "step": 92000 }, { "epoch": 0.55, "learning_rate": 8.992681620753258e-06, "loss": 1.0213, "step": 92500 }, { "epoch": 0.55, "learning_rate": 8.93318260248706e-06, "loss": 1.0354, "step": 93000 }, { "epoch": 0.56, "learning_rate": 8.873683584220861e-06, "loss": 1.0315, "step": 93500 }, { "epoch": 0.56, "learning_rate": 8.814184565954663e-06, "loss": 1.0585, "step": 94000 }, { "epoch": 0.56, "learning_rate": 8.754685547688465e-06, "loss": 1.0345, "step": 94500 }, { "epoch": 0.57, "learning_rate": 8.695186529422264e-06, "loss": 1.0612, "step": 95000 }, { "epoch": 0.57, "learning_rate": 8.635687511156066e-06, "loss": 1.0734, "step": 95500 }, { "epoch": 0.57, "learning_rate": 8.576188492889868e-06, "loss": 1.0822, "step": 96000 }, { "epoch": 0.57, "learning_rate": 8.51668947462367e-06, "loss": 1.0095, "step": 96500 }, { "epoch": 0.58, "learning_rate": 8.457190456357471e-06, "loss": 1.0104, "step": 97000 }, { "epoch": 0.58, "learning_rate": 8.397691438091273e-06, "loss": 1.0637, "step": 97500 }, { "epoch": 0.58, "learning_rate": 8.338192419825074e-06, "loss": 1.0427, "step": 98000 }, { "epoch": 0.59, "learning_rate": 8.278693401558874e-06, "loss": 1.0073, "step": 98500 }, { "epoch": 0.59, "learning_rate": 8.219194383292677e-06, "loss": 0.9647, "step": 99000 }, { "epoch": 0.59, "learning_rate": 8.159695365026477e-06, "loss": 1.0151, "step": 99500 }, { "epoch": 0.59, "learning_rate": 8.100196346760279e-06, "loss": 1.0446, "step": 100000 }, { "epoch": 0.6, "learning_rate": 8.04069732849408e-06, "loss": 1.0598, "step": 100500 }, { "epoch": 0.6, "learning_rate": 7.981198310227882e-06, "loss": 1.0044, "step": 101000 }, { "epoch": 0.6, "learning_rate": 7.921699291961684e-06, "loss": 1.0272, "step": 101500 }, { "epoch": 0.61, "learning_rate": 7.862200273695486e-06, "loss": 1.009, "step": 102000 }, { "epoch": 0.61, "learning_rate": 7.802701255429285e-06, "loss": 1.0505, "step": 102500 }, { "epoch": 0.61, "learning_rate": 7.743202237163087e-06, "loss": 1.0559, "step": 103000 }, { "epoch": 0.62, "learning_rate": 7.683703218896889e-06, "loss": 1.0412, "step": 103500 }, { "epoch": 0.62, "learning_rate": 7.62420420063069e-06, "loss": 1.0651, "step": 104000 }, { "epoch": 0.62, "learning_rate": 7.564705182364491e-06, "loss": 0.9968, "step": 104500 }, { "epoch": 0.62, "learning_rate": 7.505206164098294e-06, "loss": 1.0395, "step": 105000 }, { "epoch": 0.63, "learning_rate": 7.445707145832094e-06, "loss": 1.0378, "step": 105500 }, { "epoch": 0.63, "learning_rate": 7.386208127565896e-06, "loss": 0.99, "step": 106000 }, { "epoch": 0.63, "learning_rate": 7.326709109299697e-06, "loss": 0.9792, "step": 106500 }, { "epoch": 0.64, "learning_rate": 7.267210091033498e-06, "loss": 1.0482, "step": 107000 }, { "epoch": 0.64, "learning_rate": 7.207711072767299e-06, "loss": 1.0143, "step": 107500 }, { "epoch": 0.64, "learning_rate": 7.148212054501102e-06, "loss": 1.0321, "step": 108000 }, { "epoch": 0.65, "learning_rate": 7.088713036234903e-06, "loss": 1.0312, "step": 108500 }, { "epoch": 0.65, "learning_rate": 7.029214017968704e-06, "loss": 1.022, "step": 109000 }, { "epoch": 0.65, "learning_rate": 6.969714999702506e-06, "loss": 1.0529, "step": 109500 }, { "epoch": 0.65, "learning_rate": 6.9102159814363064e-06, "loss": 1.0269, "step": 110000 }, { "epoch": 0.66, "learning_rate": 6.850716963170109e-06, "loss": 0.9678, "step": 110500 }, { "epoch": 0.66, "learning_rate": 6.79121794490391e-06, "loss": 1.0315, "step": 111000 }, { "epoch": 0.66, "learning_rate": 6.731718926637711e-06, "loss": 0.9849, "step": 111500 }, { "epoch": 0.67, "learning_rate": 6.672219908371512e-06, "loss": 1.0397, "step": 112000 }, { "epoch": 0.67, "learning_rate": 6.6127208901053146e-06, "loss": 1.0511, "step": 112500 }, { "epoch": 0.67, "learning_rate": 6.553221871839115e-06, "loss": 1.0381, "step": 113000 }, { "epoch": 0.68, "learning_rate": 6.493722853572917e-06, "loss": 1.0584, "step": 113500 }, { "epoch": 0.68, "learning_rate": 6.434223835306718e-06, "loss": 0.9933, "step": 114000 }, { "epoch": 0.68, "learning_rate": 6.374724817040519e-06, "loss": 1.0439, "step": 114500 }, { "epoch": 0.68, "learning_rate": 6.31522579877432e-06, "loss": 1.047, "step": 115000 }, { "epoch": 0.69, "learning_rate": 6.255726780508123e-06, "loss": 1.0349, "step": 115500 }, { "epoch": 0.69, "learning_rate": 6.196227762241923e-06, "loss": 0.9923, "step": 116000 }, { "epoch": 0.69, "learning_rate": 6.136728743975725e-06, "loss": 0.9953, "step": 116500 }, { "epoch": 0.7, "learning_rate": 6.077229725709526e-06, "loss": 1.0083, "step": 117000 }, { "epoch": 0.7, "learning_rate": 6.017730707443327e-06, "loss": 1.0258, "step": 117500 }, { "epoch": 0.7, "learning_rate": 5.958231689177128e-06, "loss": 0.9729, "step": 118000 }, { "epoch": 0.71, "learning_rate": 5.898732670910931e-06, "loss": 1.0351, "step": 118500 }, { "epoch": 0.71, "learning_rate": 5.8392336526447314e-06, "loss": 1.0298, "step": 119000 }, { "epoch": 0.71, "learning_rate": 5.779734634378533e-06, "loss": 1.0067, "step": 119500 }, { "epoch": 0.71, "learning_rate": 5.7202356161123355e-06, "loss": 1.0235, "step": 120000 }, { "epoch": 0.72, "learning_rate": 5.660736597846136e-06, "loss": 1.0018, "step": 120500 }, { "epoch": 0.72, "learning_rate": 5.601237579579938e-06, "loss": 1.0096, "step": 121000 }, { "epoch": 0.72, "learning_rate": 5.541738561313739e-06, "loss": 0.9732, "step": 121500 }, { "epoch": 0.73, "learning_rate": 5.48223954304754e-06, "loss": 1.0359, "step": 122000 }, { "epoch": 0.73, "learning_rate": 5.422740524781341e-06, "loss": 0.9913, "step": 122500 }, { "epoch": 0.73, "learning_rate": 5.363241506515144e-06, "loss": 1.0132, "step": 123000 }, { "epoch": 0.73, "learning_rate": 5.303742488248944e-06, "loss": 1.0526, "step": 123500 }, { "epoch": 0.74, "learning_rate": 5.244243469982746e-06, "loss": 0.9742, "step": 124000 }, { "epoch": 0.74, "learning_rate": 5.184744451716547e-06, "loss": 0.9988, "step": 124500 }, { "epoch": 0.74, "learning_rate": 5.125245433450348e-06, "loss": 0.9938, "step": 125000 }, { "epoch": 0.75, "learning_rate": 5.065746415184149e-06, "loss": 1.022, "step": 125500 }, { "epoch": 0.75, "learning_rate": 5.006247396917952e-06, "loss": 1.0214, "step": 126000 }, { "epoch": 0.75, "learning_rate": 4.946748378651752e-06, "loss": 0.9758, "step": 126500 }, { "epoch": 0.76, "learning_rate": 4.887249360385554e-06, "loss": 1.0235, "step": 127000 }, { "epoch": 0.76, "learning_rate": 4.827750342119356e-06, "loss": 1.0387, "step": 127500 }, { "epoch": 0.76, "learning_rate": 4.768251323853157e-06, "loss": 1.0319, "step": 128000 }, { "epoch": 0.76, "learning_rate": 4.708752305586958e-06, "loss": 0.9961, "step": 128500 }, { "epoch": 0.77, "learning_rate": 4.64925328732076e-06, "loss": 1.0318, "step": 129000 }, { "epoch": 0.77, "learning_rate": 4.589754269054561e-06, "loss": 1.1137, "step": 129500 }, { "epoch": 0.77, "learning_rate": 4.530255250788362e-06, "loss": 1.0398, "step": 130000 }, { "epoch": 0.78, "learning_rate": 4.470756232522164e-06, "loss": 0.9347, "step": 130500 }, { "epoch": 0.78, "learning_rate": 4.411257214255965e-06, "loss": 1.0618, "step": 131000 }, { "epoch": 0.78, "learning_rate": 4.351758195989766e-06, "loss": 0.9826, "step": 131500 }, { "epoch": 0.79, "learning_rate": 4.292259177723568e-06, "loss": 1.0422, "step": 132000 }, { "epoch": 0.79, "learning_rate": 4.232760159457369e-06, "loss": 0.9928, "step": 132500 }, { "epoch": 0.79, "learning_rate": 4.17326114119117e-06, "loss": 1.0487, "step": 133000 }, { "epoch": 0.79, "learning_rate": 4.113762122924972e-06, "loss": 1.0379, "step": 133500 }, { "epoch": 0.8, "learning_rate": 4.054263104658773e-06, "loss": 1.0225, "step": 134000 }, { "epoch": 0.8, "learning_rate": 3.994764086392575e-06, "loss": 0.951, "step": 134500 }, { "epoch": 0.8, "learning_rate": 3.935265068126376e-06, "loss": 1.0393, "step": 135000 }, { "epoch": 0.81, "learning_rate": 3.875766049860177e-06, "loss": 0.9851, "step": 135500 }, { "epoch": 0.81, "learning_rate": 3.816267031593979e-06, "loss": 0.9716, "step": 136000 }, { "epoch": 0.81, "learning_rate": 3.7567680133277807e-06, "loss": 0.9974, "step": 136500 }, { "epoch": 0.82, "learning_rate": 3.697268995061582e-06, "loss": 1.0052, "step": 137000 }, { "epoch": 0.82, "learning_rate": 3.6377699767953835e-06, "loss": 0.9949, "step": 137500 }, { "epoch": 0.82, "learning_rate": 3.5782709585291847e-06, "loss": 1.0279, "step": 138000 }, { "epoch": 0.82, "learning_rate": 3.518771940262986e-06, "loss": 0.9636, "step": 138500 }, { "epoch": 0.83, "learning_rate": 3.4592729219967875e-06, "loss": 1.0322, "step": 139000 }, { "epoch": 0.83, "learning_rate": 3.3997739037305887e-06, "loss": 1.0153, "step": 139500 }, { "epoch": 0.83, "learning_rate": 3.3402748854643903e-06, "loss": 0.9982, "step": 140000 }, { "epoch": 0.84, "learning_rate": 3.2807758671981915e-06, "loss": 1.022, "step": 140500 }, { "epoch": 0.84, "learning_rate": 3.2212768489319927e-06, "loss": 1.0208, "step": 141000 }, { "epoch": 0.84, "learning_rate": 3.1617778306657944e-06, "loss": 0.9761, "step": 141500 }, { "epoch": 0.84, "learning_rate": 3.1022788123995956e-06, "loss": 1.0125, "step": 142000 }, { "epoch": 0.85, "learning_rate": 3.0427797941333968e-06, "loss": 1.0531, "step": 142500 }, { "epoch": 0.85, "learning_rate": 2.9832807758671984e-06, "loss": 1.0232, "step": 143000 }, { "epoch": 0.85, "learning_rate": 2.9237817576009996e-06, "loss": 1.067, "step": 143500 }, { "epoch": 0.86, "learning_rate": 2.8642827393348012e-06, "loss": 0.9986, "step": 144000 }, { "epoch": 0.86, "learning_rate": 2.8047837210686024e-06, "loss": 1.0009, "step": 144500 }, { "epoch": 0.86, "learning_rate": 2.7452847028024036e-06, "loss": 1.0113, "step": 145000 }, { "epoch": 0.87, "learning_rate": 2.6857856845362052e-06, "loss": 0.9991, "step": 145500 }, { "epoch": 0.87, "learning_rate": 2.6262866662700064e-06, "loss": 1.0153, "step": 146000 }, { "epoch": 0.87, "learning_rate": 2.5667876480038085e-06, "loss": 1.0382, "step": 146500 }, { "epoch": 0.87, "learning_rate": 2.5072886297376097e-06, "loss": 0.993, "step": 147000 }, { "epoch": 0.88, "learning_rate": 2.447789611471411e-06, "loss": 0.9704, "step": 147500 }, { "epoch": 0.88, "learning_rate": 2.388290593205212e-06, "loss": 0.9935, "step": 148000 }, { "epoch": 0.88, "learning_rate": 2.3287915749390137e-06, "loss": 1.0239, "step": 148500 }, { "epoch": 0.89, "learning_rate": 2.269292556672815e-06, "loss": 0.9927, "step": 149000 }, { "epoch": 0.89, "learning_rate": 2.2097935384066165e-06, "loss": 1.0262, "step": 149500 }, { "epoch": 0.89, "learning_rate": 2.150294520140418e-06, "loss": 0.9762, "step": 150000 }, { "epoch": 0.9, "learning_rate": 2.0907955018742194e-06, "loss": 1.0298, "step": 150500 }, { "epoch": 0.9, "learning_rate": 2.0312964836080206e-06, "loss": 0.9869, "step": 151000 }, { "epoch": 0.9, "learning_rate": 1.971797465341822e-06, "loss": 1.0192, "step": 151500 }, { "epoch": 0.9, "learning_rate": 1.9122984470756234e-06, "loss": 1.0034, "step": 152000 }, { "epoch": 0.91, "learning_rate": 1.8527994288094248e-06, "loss": 1.0179, "step": 152500 }, { "epoch": 0.91, "learning_rate": 1.7933004105432262e-06, "loss": 1.0189, "step": 153000 }, { "epoch": 0.91, "learning_rate": 1.7338013922770274e-06, "loss": 0.9545, "step": 153500 }, { "epoch": 0.92, "learning_rate": 1.6743023740108288e-06, "loss": 1.0462, "step": 154000 }, { "epoch": 0.92, "learning_rate": 1.6148033557446302e-06, "loss": 1.0083, "step": 154500 }, { "epoch": 0.92, "learning_rate": 1.5553043374784319e-06, "loss": 0.9755, "step": 155000 }, { "epoch": 0.93, "learning_rate": 1.4958053192122333e-06, "loss": 1.0234, "step": 155500 }, { "epoch": 0.93, "learning_rate": 1.4363063009460345e-06, "loss": 1.0116, "step": 156000 }, { "epoch": 0.93, "learning_rate": 1.3768072826798359e-06, "loss": 1.0502, "step": 156500 }, { "epoch": 0.93, "learning_rate": 1.3173082644136373e-06, "loss": 0.9593, "step": 157000 }, { "epoch": 0.94, "learning_rate": 1.2578092461474387e-06, "loss": 1.0195, "step": 157500 }, { "epoch": 0.94, "learning_rate": 1.19831022788124e-06, "loss": 1.0212, "step": 158000 }, { "epoch": 0.94, "learning_rate": 1.1388112096150413e-06, "loss": 0.9523, "step": 158500 }, { "epoch": 0.95, "learning_rate": 1.079312191348843e-06, "loss": 0.9902, "step": 159000 }, { "epoch": 0.95, "learning_rate": 1.0198131730826442e-06, "loss": 1.0248, "step": 159500 }, { "epoch": 0.95, "learning_rate": 9.603141548164456e-07, "loss": 1.0062, "step": 160000 }, { "epoch": 0.95, "learning_rate": 9.00815136550247e-07, "loss": 1.0035, "step": 160500 }, { "epoch": 0.96, "learning_rate": 8.413161182840483e-07, "loss": 0.9755, "step": 161000 }, { "epoch": 0.96, "learning_rate": 7.818171000178498e-07, "loss": 1.0131, "step": 161500 }, { "epoch": 0.96, "learning_rate": 7.223180817516512e-07, "loss": 1.0353, "step": 162000 }, { "epoch": 0.97, "learning_rate": 6.628190634854525e-07, "loss": 1.0138, "step": 162500 }, { "epoch": 0.97, "learning_rate": 6.033200452192539e-07, "loss": 0.9738, "step": 163000 }, { "epoch": 0.97, "learning_rate": 5.438210269530553e-07, "loss": 0.9878, "step": 163500 }, { "epoch": 0.98, "learning_rate": 4.843220086868568e-07, "loss": 0.9787, "step": 164000 }, { "epoch": 0.98, "learning_rate": 4.2482299042065806e-07, "loss": 0.9903, "step": 164500 }, { "epoch": 0.98, "learning_rate": 3.653239721544595e-07, "loss": 1.032, "step": 165000 }, { "epoch": 0.98, "learning_rate": 3.0582495388826084e-07, "loss": 1.0436, "step": 165500 }, { "epoch": 0.99, "learning_rate": 2.4632593562206225e-07, "loss": 0.9956, "step": 166000 }, { "epoch": 0.99, "learning_rate": 1.8682691735586366e-07, "loss": 1.0102, "step": 166500 }, { "epoch": 0.99, "learning_rate": 1.2732789908966505e-07, "loss": 0.9634, "step": 167000 }, { "epoch": 1.0, "learning_rate": 6.782888082346642e-08, "loss": 1.004, "step": 167500 }, { "epoch": 1.0, "learning_rate": 8.329862557267807e-09, "loss": 0.9777, "step": 168000 } ], "max_steps": 168070, "num_train_epochs": 1, "total_flos": 6437535729057792.0, "trial_name": null, "trial_params": null }