{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 56860, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 1.9824129440731624e-05, "loss": 0.5315, "step": 500 }, { "epoch": 0.18, "learning_rate": 1.9648258881463246e-05, "loss": 0.2968, "step": 1000 }, { "epoch": 0.26, "learning_rate": 1.9472388322194867e-05, "loss": 0.2756, "step": 1500 }, { "epoch": 0.35, "learning_rate": 1.929651776292649e-05, "loss": 0.2673, "step": 2000 }, { "epoch": 0.44, "learning_rate": 1.9120647203658108e-05, "loss": 0.2521, "step": 2500 }, { "epoch": 0.53, "learning_rate": 1.894477664438973e-05, "loss": 0.2405, "step": 3000 }, { "epoch": 0.62, "learning_rate": 1.8768906085121352e-05, "loss": 0.2264, "step": 3500 }, { "epoch": 0.7, "learning_rate": 1.8593035525852974e-05, "loss": 0.2264, "step": 4000 }, { "epoch": 0.79, "learning_rate": 1.8417164966584596e-05, "loss": 0.2231, "step": 4500 }, { "epoch": 0.88, "learning_rate": 1.8241294407316218e-05, "loss": 0.6134, "step": 5000 }, { "epoch": 0.97, "learning_rate": 1.806542384804784e-05, "loss": 0.6581, "step": 5500 }, { "epoch": 1.06, "learning_rate": 1.788955328877946e-05, "loss": 0.6592, "step": 6000 }, { "epoch": 1.14, "learning_rate": 1.771368272951108e-05, "loss": 0.6561, "step": 6500 }, { "epoch": 1.23, "learning_rate": 1.7537812170242702e-05, "loss": 0.6599, "step": 7000 }, { "epoch": 1.32, "learning_rate": 1.7361941610974324e-05, "loss": 0.6569, "step": 7500 }, { "epoch": 1.41, "learning_rate": 1.7186071051705946e-05, "loss": 0.6531, "step": 8000 }, { "epoch": 1.49, "learning_rate": 1.7010200492437568e-05, "loss": 0.2212, "step": 8500 }, { "epoch": 1.58, "learning_rate": 1.683432993316919e-05, "loss": 0.4708, "step": 9000 }, { "epoch": 1.67, "learning_rate": 1.665845937390081e-05, "loss": 0.2605, "step": 9500 }, { "epoch": 1.76, "learning_rate": 1.6482588814632434e-05, "loss": 0.1987, "step": 10000 }, { "epoch": 1.85, "learning_rate": 1.6306718255364052e-05, "loss": 0.1908, "step": 10500 }, { "epoch": 1.93, "learning_rate": 1.6130847696095674e-05, "loss": 0.185, "step": 11000 }, { "epoch": 2.02, "learning_rate": 1.5954977136827296e-05, "loss": 0.1767, "step": 11500 }, { "epoch": 2.11, "learning_rate": 1.5779106577558918e-05, "loss": 0.1551, "step": 12000 }, { "epoch": 2.2, "learning_rate": 1.560323601829054e-05, "loss": 0.1476, "step": 12500 }, { "epoch": 2.29, "learning_rate": 1.5427365459022162e-05, "loss": 0.1449, "step": 13000 }, { "epoch": 2.37, "learning_rate": 1.5251494899753782e-05, "loss": 0.1566, "step": 13500 }, { "epoch": 2.46, "learning_rate": 1.5075624340485404e-05, "loss": 0.1586, "step": 14000 }, { "epoch": 2.55, "learning_rate": 1.4899753781217026e-05, "loss": 0.1487, "step": 14500 }, { "epoch": 2.64, "learning_rate": 1.4723883221948648e-05, "loss": 0.1493, "step": 15000 }, { "epoch": 2.73, "learning_rate": 1.4548012662680268e-05, "loss": 0.1547, "step": 15500 }, { "epoch": 2.81, "learning_rate": 1.437214210341189e-05, "loss": 0.1528, "step": 16000 }, { "epoch": 2.9, "learning_rate": 1.4196271544143512e-05, "loss": 0.1447, "step": 16500 }, { "epoch": 2.99, "learning_rate": 1.4020400984875134e-05, "loss": 0.1562, "step": 17000 }, { "epoch": 3.08, "learning_rate": 1.3844530425606754e-05, "loss": 0.1022, "step": 17500 }, { "epoch": 3.17, "learning_rate": 1.3668659866338376e-05, "loss": 0.099, "step": 18000 }, { "epoch": 3.25, "learning_rate": 1.3492789307069998e-05, "loss": 0.0971, "step": 18500 }, { "epoch": 3.34, "learning_rate": 1.331691874780162e-05, "loss": 0.1046, "step": 19000 }, { "epoch": 3.43, "learning_rate": 1.314104818853324e-05, "loss": 0.1083, "step": 19500 }, { "epoch": 3.52, "learning_rate": 1.2965177629264862e-05, "loss": 0.0998, "step": 20000 }, { "epoch": 3.61, "learning_rate": 1.2789307069996484e-05, "loss": 0.1049, "step": 20500 }, { "epoch": 3.69, "learning_rate": 1.2613436510728106e-05, "loss": 0.1052, "step": 21000 }, { "epoch": 3.78, "learning_rate": 1.2437565951459726e-05, "loss": 0.108, "step": 21500 }, { "epoch": 3.87, "learning_rate": 1.2261695392191348e-05, "loss": 0.0991, "step": 22000 }, { "epoch": 3.96, "learning_rate": 1.208582483292297e-05, "loss": 0.1024, "step": 22500 }, { "epoch": 4.05, "learning_rate": 1.1909954273654592e-05, "loss": 0.0822, "step": 23000 }, { "epoch": 4.13, "learning_rate": 1.1734083714386212e-05, "loss": 0.069, "step": 23500 }, { "epoch": 4.22, "learning_rate": 1.1558213155117834e-05, "loss": 0.0686, "step": 24000 }, { "epoch": 4.31, "learning_rate": 1.1382342595849456e-05, "loss": 0.0709, "step": 24500 }, { "epoch": 4.4, "learning_rate": 1.1206472036581078e-05, "loss": 0.0747, "step": 25000 }, { "epoch": 4.48, "learning_rate": 1.1030601477312698e-05, "loss": 0.0658, "step": 25500 }, { "epoch": 4.57, "learning_rate": 1.085473091804432e-05, "loss": 0.0732, "step": 26000 }, { "epoch": 4.66, "learning_rate": 1.0678860358775942e-05, "loss": 0.0715, "step": 26500 }, { "epoch": 4.75, "learning_rate": 1.0502989799507564e-05, "loss": 0.0778, "step": 27000 }, { "epoch": 4.84, "learning_rate": 1.0327119240239184e-05, "loss": 0.0724, "step": 27500 }, { "epoch": 4.92, "learning_rate": 1.0151248680970806e-05, "loss": 0.0722, "step": 28000 }, { "epoch": 5.01, "learning_rate": 9.975378121702428e-06, "loss": 0.0691, "step": 28500 }, { "epoch": 5.1, "learning_rate": 9.79950756243405e-06, "loss": 0.0485, "step": 29000 }, { "epoch": 5.19, "learning_rate": 9.62363700316567e-06, "loss": 0.0503, "step": 29500 }, { "epoch": 5.28, "learning_rate": 9.447766443897292e-06, "loss": 0.0536, "step": 30000 }, { "epoch": 5.36, "learning_rate": 9.271895884628914e-06, "loss": 0.0503, "step": 30500 }, { "epoch": 5.45, "learning_rate": 9.096025325360536e-06, "loss": 0.0452, "step": 31000 }, { "epoch": 5.54, "learning_rate": 8.920154766092157e-06, "loss": 0.0473, "step": 31500 }, { "epoch": 5.63, "learning_rate": 8.744284206823778e-06, "loss": 0.0488, "step": 32000 }, { "epoch": 5.72, "learning_rate": 8.5684136475554e-06, "loss": 0.0507, "step": 32500 }, { "epoch": 5.8, "learning_rate": 8.392543088287022e-06, "loss": 0.0467, "step": 33000 }, { "epoch": 5.89, "learning_rate": 8.216672529018643e-06, "loss": 0.0556, "step": 33500 }, { "epoch": 5.98, "learning_rate": 8.040801969750265e-06, "loss": 0.0481, "step": 34000 }, { "epoch": 6.07, "learning_rate": 7.864931410481886e-06, "loss": 0.0367, "step": 34500 }, { "epoch": 6.16, "learning_rate": 7.689060851213508e-06, "loss": 0.0329, "step": 35000 }, { "epoch": 6.24, "learning_rate": 7.5131902919451295e-06, "loss": 0.0294, "step": 35500 }, { "epoch": 6.33, "learning_rate": 7.3373197326767506e-06, "loss": 0.032, "step": 36000 }, { "epoch": 6.42, "learning_rate": 7.1614491734083725e-06, "loss": 0.0331, "step": 36500 }, { "epoch": 6.51, "learning_rate": 6.985578614139994e-06, "loss": 0.0274, "step": 37000 }, { "epoch": 6.6, "learning_rate": 6.8097080548716155e-06, "loss": 0.0385, "step": 37500 }, { "epoch": 6.68, "learning_rate": 6.633837495603237e-06, "loss": 0.0367, "step": 38000 }, { "epoch": 6.77, "learning_rate": 6.4579669363348586e-06, "loss": 0.0447, "step": 38500 }, { "epoch": 6.86, "learning_rate": 6.28209637706648e-06, "loss": 0.0433, "step": 39000 }, { "epoch": 6.95, "learning_rate": 6.106225817798102e-06, "loss": 0.034, "step": 39500 }, { "epoch": 7.03, "learning_rate": 5.930355258529723e-06, "loss": 0.0311, "step": 40000 }, { "epoch": 7.12, "learning_rate": 5.754484699261345e-06, "loss": 0.0189, "step": 40500 }, { "epoch": 7.21, "learning_rate": 5.578614139992966e-06, "loss": 0.02, "step": 41000 }, { "epoch": 7.3, "learning_rate": 5.402743580724588e-06, "loss": 0.0218, "step": 41500 }, { "epoch": 7.39, "learning_rate": 5.226873021456209e-06, "loss": 0.0217, "step": 42000 }, { "epoch": 7.47, "learning_rate": 5.051002462187831e-06, "loss": 0.0282, "step": 42500 }, { "epoch": 7.56, "learning_rate": 4.875131902919452e-06, "loss": 0.0267, "step": 43000 }, { "epoch": 7.65, "learning_rate": 4.699261343651073e-06, "loss": 0.026, "step": 43500 }, { "epoch": 7.74, "learning_rate": 4.523390784382695e-06, "loss": 0.0217, "step": 44000 }, { "epoch": 7.83, "learning_rate": 4.347520225114316e-06, "loss": 0.0194, "step": 44500 }, { "epoch": 7.91, "learning_rate": 4.171649665845938e-06, "loss": 0.026, "step": 45000 }, { "epoch": 8.0, "learning_rate": 3.995779106577559e-06, "loss": 0.0274, "step": 45500 }, { "epoch": 8.09, "learning_rate": 3.819908547309181e-06, "loss": 0.0107, "step": 46000 }, { "epoch": 8.18, "learning_rate": 3.6440379880408023e-06, "loss": 0.0103, "step": 46500 }, { "epoch": 8.27, "learning_rate": 3.468167428772424e-06, "loss": 0.0172, "step": 47000 }, { "epoch": 8.35, "learning_rate": 3.2922968695040454e-06, "loss": 0.0129, "step": 47500 }, { "epoch": 8.44, "learning_rate": 3.116426310235667e-06, "loss": 0.017, "step": 48000 }, { "epoch": 8.53, "learning_rate": 2.9405557509672884e-06, "loss": 0.0204, "step": 48500 }, { "epoch": 8.62, "learning_rate": 2.76468519169891e-06, "loss": 0.0128, "step": 49000 }, { "epoch": 8.71, "learning_rate": 2.5888146324305314e-06, "loss": 0.0213, "step": 49500 }, { "epoch": 8.79, "learning_rate": 2.412944073162153e-06, "loss": 0.0125, "step": 50000 }, { "epoch": 8.88, "learning_rate": 2.2370735138937744e-06, "loss": 0.0128, "step": 50500 }, { "epoch": 8.97, "learning_rate": 2.061202954625396e-06, "loss": 0.0182, "step": 51000 }, { "epoch": 9.06, "learning_rate": 1.8853323953570175e-06, "loss": 0.0116, "step": 51500 }, { "epoch": 9.15, "learning_rate": 1.709461836088639e-06, "loss": 0.0081, "step": 52000 }, { "epoch": 9.23, "learning_rate": 1.5335912768202605e-06, "loss": 0.0084, "step": 52500 }, { "epoch": 9.32, "learning_rate": 1.357720717551882e-06, "loss": 0.0076, "step": 53000 }, { "epoch": 9.41, "learning_rate": 1.1818501582835035e-06, "loss": 0.0089, "step": 53500 }, { "epoch": 9.5, "learning_rate": 1.005979599015125e-06, "loss": 0.0067, "step": 54000 }, { "epoch": 9.58, "learning_rate": 8.301090397467465e-07, "loss": 0.0066, "step": 54500 }, { "epoch": 9.67, "learning_rate": 6.542384804783681e-07, "loss": 0.0097, "step": 55000 }, { "epoch": 9.76, "learning_rate": 4.783679212099895e-07, "loss": 0.0099, "step": 55500 }, { "epoch": 9.85, "learning_rate": 3.02497361941611e-07, "loss": 0.0059, "step": 56000 }, { "epoch": 9.94, "learning_rate": 1.2662680267323252e-07, "loss": 0.0075, "step": 56500 }, { "epoch": 10.0, "step": 56860, "total_flos": 9.365103496606515e+17, "train_runtime": 69350.928, "train_samples_per_second": 52.464, "train_steps_per_second": 0.82 } ], "max_steps": 56860, "num_train_epochs": 10, "total_flos": 9.365103496606515e+17, "trial_name": null, "trial_params": null }