{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.948867786705623, "global_step": 164000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 5e-05, "loss": 7.0207, "step": 1000 }, { "epoch": 0.37, "learning_rate": 4.96937775600196e-05, "loss": 5.1703, "step": 2000 }, { "epoch": 0.55, "learning_rate": 4.93875551200392e-05, "loss": 4.7048, "step": 3000 }, { "epoch": 0.73, "learning_rate": 4.9081332680058794e-05, "loss": 4.3894, "step": 4000 }, { "epoch": 0.91, "learning_rate": 4.87751102400784e-05, "loss": 4.1348, "step": 5000 }, { "epoch": 1.1, "learning_rate": 4.8468887800097995e-05, "loss": 3.9447, "step": 6000 }, { "epoch": 1.28, "learning_rate": 4.816266536011759e-05, "loss": 3.8286, "step": 7000 }, { "epoch": 1.46, "learning_rate": 4.785644292013719e-05, "loss": 3.7462, "step": 8000 }, { "epoch": 1.64, "learning_rate": 4.7550220480156786e-05, "loss": 3.693, "step": 9000 }, { "epoch": 1.83, "learning_rate": 4.7243998040176384e-05, "loss": 3.6311, "step": 10000 }, { "epoch": 2.01, "learning_rate": 4.693777560019598e-05, "loss": 3.5843, "step": 11000 }, { "epoch": 2.19, "learning_rate": 4.6631553160215585e-05, "loss": 3.4624, "step": 12000 }, { "epoch": 2.37, "learning_rate": 4.632533072023518e-05, "loss": 3.4432, "step": 13000 }, { "epoch": 2.56, "learning_rate": 4.601910828025478e-05, "loss": 3.423, "step": 14000 }, { "epoch": 2.74, "learning_rate": 4.5712885840274376e-05, "loss": 3.4042, "step": 15000 }, { "epoch": 2.92, "learning_rate": 4.540666340029397e-05, "loss": 3.3902, "step": 16000 }, { "epoch": 3.1, "learning_rate": 4.510044096031357e-05, "loss": 3.2973, "step": 17000 }, { "epoch": 3.29, "learning_rate": 4.479421852033317e-05, "loss": 3.2482, "step": 18000 }, { "epoch": 3.47, "learning_rate": 4.448799608035277e-05, "loss": 3.2427, "step": 19000 }, { "epoch": 3.65, "learning_rate": 4.418177364037237e-05, "loss": 3.2309, "step": 20000 }, { "epoch": 3.83, "learning_rate": 4.3875551200391966e-05, "loss": 3.2193, "step": 21000 }, { "epoch": 4.02, "learning_rate": 4.356932876041156e-05, "loss": 3.2065, "step": 22000 }, { "epoch": 4.2, "learning_rate": 4.326310632043116e-05, "loss": 3.0733, "step": 23000 }, { "epoch": 4.38, "learning_rate": 4.295688388045076e-05, "loss": 3.0868, "step": 24000 }, { "epoch": 4.57, "learning_rate": 4.2650661440470355e-05, "loss": 3.0884, "step": 25000 }, { "epoch": 4.75, "learning_rate": 4.234443900048996e-05, "loss": 3.0843, "step": 26000 }, { "epoch": 4.93, "learning_rate": 4.2038216560509556e-05, "loss": 3.0768, "step": 27000 }, { "epoch": 5.11, "learning_rate": 4.173199412052915e-05, "loss": 2.9834, "step": 28000 }, { "epoch": 5.3, "learning_rate": 4.142577168054875e-05, "loss": 2.9267, "step": 29000 }, { "epoch": 5.48, "learning_rate": 4.111954924056835e-05, "loss": 2.9463, "step": 30000 }, { "epoch": 5.66, "learning_rate": 4.081332680058795e-05, "loss": 2.9523, "step": 31000 }, { "epoch": 5.84, "learning_rate": 4.050710436060755e-05, "loss": 2.9561, "step": 32000 }, { "epoch": 6.03, "learning_rate": 4.0200881920627145e-05, "loss": 2.9237, "step": 33000 }, { "epoch": 6.21, "learning_rate": 3.989465948064674e-05, "loss": 2.7803, "step": 34000 }, { "epoch": 6.39, "learning_rate": 3.958843704066634e-05, "loss": 2.8068, "step": 35000 }, { "epoch": 6.57, "learning_rate": 3.9282214600685944e-05, "loss": 2.8223, "step": 36000 }, { "epoch": 6.76, "learning_rate": 3.897599216070554e-05, "loss": 2.8238, "step": 37000 }, { "epoch": 6.94, "learning_rate": 3.866976972072514e-05, "loss": 2.8341, "step": 38000 }, { "epoch": 7.12, "learning_rate": 3.8363547280744735e-05, "loss": 2.6976, "step": 39000 }, { "epoch": 7.3, "learning_rate": 3.805732484076434e-05, "loss": 2.6679, "step": 40000 }, { "epoch": 7.49, "learning_rate": 3.7751102400783936e-05, "loss": 2.691, "step": 41000 }, { "epoch": 7.67, "learning_rate": 3.744487996080353e-05, "loss": 2.7001, "step": 42000 }, { "epoch": 7.85, "learning_rate": 3.713865752082313e-05, "loss": 2.7189, "step": 43000 }, { "epoch": 8.04, "learning_rate": 3.683243508084273e-05, "loss": 2.6829, "step": 44000 }, { "epoch": 8.22, "learning_rate": 3.6526212640862325e-05, "loss": 2.5262, "step": 45000 }, { "epoch": 8.4, "learning_rate": 3.621999020088192e-05, "loss": 2.5547, "step": 46000 }, { "epoch": 8.58, "learning_rate": 3.5913767760901526e-05, "loss": 2.5926, "step": 47000 }, { "epoch": 8.77, "learning_rate": 3.560754532092112e-05, "loss": 2.5971, "step": 48000 }, { "epoch": 8.95, "learning_rate": 3.530132288094072e-05, "loss": 2.6053, "step": 49000 }, { "epoch": 9.13, "learning_rate": 3.499510044096032e-05, "loss": 2.4615, "step": 50000 }, { "epoch": 9.31, "learning_rate": 3.4688878000979914e-05, "loss": 2.4412, "step": 51000 }, { "epoch": 9.5, "learning_rate": 3.438265556099951e-05, "loss": 2.4633, "step": 52000 }, { "epoch": 9.68, "learning_rate": 3.407643312101911e-05, "loss": 2.469, "step": 53000 }, { "epoch": 9.86, "learning_rate": 3.377021068103871e-05, "loss": 2.5006, "step": 54000 }, { "epoch": 10.04, "learning_rate": 3.346398824105831e-05, "loss": 2.4556, "step": 55000 }, { "epoch": 10.23, "learning_rate": 3.315776580107791e-05, "loss": 2.3134, "step": 56000 }, { "epoch": 10.41, "learning_rate": 3.2851543361097504e-05, "loss": 2.3445, "step": 57000 }, { "epoch": 10.59, "learning_rate": 3.25453209211171e-05, "loss": 2.3709, "step": 58000 }, { "epoch": 10.77, "learning_rate": 3.22390984811367e-05, "loss": 2.3831, "step": 59000 }, { "epoch": 10.96, "learning_rate": 3.1932876041156296e-05, "loss": 2.4089, "step": 60000 }, { "epoch": 11.14, "learning_rate": 3.162665360117589e-05, "loss": 2.248, "step": 61000 }, { "epoch": 11.32, "learning_rate": 3.13204311611955e-05, "loss": 2.2344, "step": 62000 }, { "epoch": 11.5, "learning_rate": 3.1014208721215094e-05, "loss": 2.2553, "step": 63000 }, { "epoch": 11.69, "learning_rate": 3.070798628123469e-05, "loss": 2.2816, "step": 64000 }, { "epoch": 11.87, "learning_rate": 3.0401763841254288e-05, "loss": 2.3067, "step": 65000 }, { "epoch": 12.05, "learning_rate": 3.0095541401273885e-05, "loss": 2.248, "step": 66000 }, { "epoch": 12.24, "learning_rate": 2.9789318961293483e-05, "loss": 2.1257, "step": 67000 }, { "epoch": 12.42, "learning_rate": 2.948309652131308e-05, "loss": 2.1559, "step": 68000 }, { "epoch": 12.6, "learning_rate": 2.9176874081332684e-05, "loss": 2.1743, "step": 69000 }, { "epoch": 12.78, "learning_rate": 2.887065164135228e-05, "loss": 2.1997, "step": 70000 }, { "epoch": 12.97, "learning_rate": 2.8564429201371878e-05, "loss": 2.2321, "step": 71000 }, { "epoch": 13.15, "learning_rate": 2.8258206761391475e-05, "loss": 2.0603, "step": 72000 }, { "epoch": 13.33, "learning_rate": 2.7951984321411072e-05, "loss": 2.0491, "step": 73000 }, { "epoch": 13.51, "learning_rate": 2.764576188143067e-05, "loss": 2.079, "step": 74000 }, { "epoch": 13.7, "learning_rate": 2.7339539441450267e-05, "loss": 2.1119, "step": 75000 }, { "epoch": 13.88, "learning_rate": 2.703331700146987e-05, "loss": 2.1355, "step": 76000 }, { "epoch": 14.06, "learning_rate": 2.6727094561489468e-05, "loss": 2.068, "step": 77000 }, { "epoch": 14.24, "learning_rate": 2.6420872121509065e-05, "loss": 1.96, "step": 78000 }, { "epoch": 14.43, "learning_rate": 2.6114649681528662e-05, "loss": 1.9827, "step": 79000 }, { "epoch": 14.61, "learning_rate": 2.5808427241548263e-05, "loss": 2.0163, "step": 80000 }, { "epoch": 14.79, "learning_rate": 2.550220480156786e-05, "loss": 2.0496, "step": 81000 }, { "epoch": 14.97, "learning_rate": 2.5195982361587457e-05, "loss": 2.0579, "step": 82000 }, { "epoch": 15.16, "learning_rate": 2.4889759921607057e-05, "loss": 1.8884, "step": 83000 }, { "epoch": 15.34, "learning_rate": 2.4583537481626655e-05, "loss": 1.8992, "step": 84000 }, { "epoch": 15.52, "learning_rate": 2.4277315041646255e-05, "loss": 1.9383, "step": 85000 }, { "epoch": 15.7, "learning_rate": 2.3971092601665852e-05, "loss": 1.9512, "step": 86000 }, { "epoch": 15.89, "learning_rate": 2.366487016168545e-05, "loss": 1.9814, "step": 87000 }, { "epoch": 16.07, "learning_rate": 2.335864772170505e-05, "loss": 1.9075, "step": 88000 }, { "epoch": 16.25, "learning_rate": 2.3052425281724647e-05, "loss": 1.8141, "step": 89000 }, { "epoch": 16.44, "learning_rate": 2.2746202841744244e-05, "loss": 1.8431, "step": 90000 }, { "epoch": 16.62, "learning_rate": 2.243998040176384e-05, "loss": 1.8723, "step": 91000 }, { "epoch": 16.8, "learning_rate": 2.2133757961783442e-05, "loss": 1.9034, "step": 92000 }, { "epoch": 16.98, "learning_rate": 2.182753552180304e-05, "loss": 1.9112, "step": 93000 }, { "epoch": 17.17, "learning_rate": 2.1521313081822636e-05, "loss": 1.7565, "step": 94000 }, { "epoch": 17.35, "learning_rate": 2.1215090641842237e-05, "loss": 1.7656, "step": 95000 }, { "epoch": 17.53, "learning_rate": 2.0908868201861834e-05, "loss": 1.7856, "step": 96000 }, { "epoch": 17.71, "learning_rate": 2.060264576188143e-05, "loss": 1.8185, "step": 97000 }, { "epoch": 17.9, "learning_rate": 2.029642332190103e-05, "loss": 1.8397, "step": 98000 }, { "epoch": 18.08, "learning_rate": 1.999020088192063e-05, "loss": 1.7726, "step": 99000 }, { "epoch": 18.26, "learning_rate": 1.9683978441940226e-05, "loss": 1.6883, "step": 100000 }, { "epoch": 18.44, "learning_rate": 1.9377756001959823e-05, "loss": 1.724, "step": 101000 }, { "epoch": 18.63, "learning_rate": 1.9071533561979424e-05, "loss": 1.751, "step": 102000 }, { "epoch": 18.81, "learning_rate": 1.876531112199902e-05, "loss": 1.7654, "step": 103000 }, { "epoch": 18.99, "learning_rate": 1.8459088682018618e-05, "loss": 1.7807, "step": 104000 }, { "epoch": 19.17, "learning_rate": 1.8152866242038215e-05, "loss": 1.6287, "step": 105000 }, { "epoch": 19.36, "learning_rate": 1.7846643802057816e-05, "loss": 1.6529, "step": 106000 }, { "epoch": 19.54, "learning_rate": 1.7540421362077413e-05, "loss": 1.6782, "step": 107000 }, { "epoch": 19.72, "learning_rate": 1.723419892209701e-05, "loss": 1.6989, "step": 108000 }, { "epoch": 19.91, "learning_rate": 1.692797648211661e-05, "loss": 1.7147, "step": 109000 }, { "epoch": 20.09, "learning_rate": 1.6621754042136208e-05, "loss": 1.6426, "step": 110000 }, { "epoch": 20.27, "learning_rate": 1.6315531602155805e-05, "loss": 1.5852, "step": 111000 }, { "epoch": 20.45, "learning_rate": 1.6009309162175405e-05, "loss": 1.6156, "step": 112000 }, { "epoch": 20.64, "learning_rate": 1.5703086722195003e-05, "loss": 1.6307, "step": 113000 }, { "epoch": 20.82, "learning_rate": 1.53968642822146e-05, "loss": 1.6493, "step": 114000 }, { "epoch": 21.0, "learning_rate": 1.5090641842234199e-05, "loss": 1.6623, "step": 115000 }, { "epoch": 21.18, "learning_rate": 1.47844194022538e-05, "loss": 1.5248, "step": 116000 }, { "epoch": 21.37, "learning_rate": 1.4478196962273396e-05, "loss": 1.551, "step": 117000 }, { "epoch": 21.55, "learning_rate": 1.4171974522292993e-05, "loss": 1.5738, "step": 118000 }, { "epoch": 21.73, "learning_rate": 1.386575208231259e-05, "loss": 1.5854, "step": 119000 }, { "epoch": 21.91, "learning_rate": 1.3559529642332191e-05, "loss": 1.599, "step": 120000 }, { "epoch": 22.1, "learning_rate": 1.3253307202351788e-05, "loss": 1.5402, "step": 121000 }, { "epoch": 22.28, "learning_rate": 1.2947084762371387e-05, "loss": 1.486, "step": 122000 }, { "epoch": 22.46, "learning_rate": 1.2640862322390986e-05, "loss": 1.5188, "step": 123000 }, { "epoch": 22.64, "learning_rate": 1.2334639882410585e-05, "loss": 1.5442, "step": 124000 }, { "epoch": 22.83, "learning_rate": 1.2028417442430182e-05, "loss": 1.5483, "step": 125000 }, { "epoch": 23.01, "learning_rate": 1.1722195002449781e-05, "loss": 1.5501, "step": 126000 }, { "epoch": 23.19, "learning_rate": 1.1415972562469378e-05, "loss": 1.4471, "step": 127000 }, { "epoch": 23.37, "learning_rate": 1.1109750122488977e-05, "loss": 1.4578, "step": 128000 }, { "epoch": 23.56, "learning_rate": 1.0803527682508574e-05, "loss": 1.4712, "step": 129000 }, { "epoch": 23.74, "learning_rate": 1.0497305242528173e-05, "loss": 1.4918, "step": 130000 }, { "epoch": 23.92, "learning_rate": 1.0191082802547772e-05, "loss": 1.5129, "step": 131000 }, { "epoch": 24.11, "learning_rate": 9.884860362567369e-06, "loss": 1.4455, "step": 132000 }, { "epoch": 24.29, "learning_rate": 9.578637922586968e-06, "loss": 1.4141, "step": 133000 }, { "epoch": 24.47, "learning_rate": 9.272415482606565e-06, "loss": 1.4301, "step": 134000 }, { "epoch": 24.65, "learning_rate": 8.966193042626164e-06, "loss": 1.4454, "step": 135000 }, { "epoch": 24.84, "learning_rate": 8.659970602645761e-06, "loss": 1.4599, "step": 136000 }, { "epoch": 25.02, "learning_rate": 8.35374816266536e-06, "loss": 1.4573, "step": 137000 }, { "epoch": 25.2, "learning_rate": 8.047525722684959e-06, "loss": 1.3704, "step": 138000 }, { "epoch": 25.38, "learning_rate": 7.741303282704558e-06, "loss": 1.3845, "step": 139000 }, { "epoch": 25.57, "learning_rate": 7.4350808427241555e-06, "loss": 1.4078, "step": 140000 }, { "epoch": 25.75, "learning_rate": 7.128858402743753e-06, "loss": 1.4147, "step": 141000 }, { "epoch": 25.93, "learning_rate": 6.8226359627633515e-06, "loss": 1.4223, "step": 142000 }, { "epoch": 26.11, "learning_rate": 6.5164135227829495e-06, "loss": 1.3655, "step": 143000 }, { "epoch": 26.3, "learning_rate": 6.210191082802548e-06, "loss": 1.3528, "step": 144000 }, { "epoch": 26.48, "learning_rate": 5.903968642822146e-06, "loss": 1.3626, "step": 145000 }, { "epoch": 26.66, "learning_rate": 5.597746202841744e-06, "loss": 1.3719, "step": 146000 }, { "epoch": 26.84, "learning_rate": 5.291523762861342e-06, "loss": 1.3808, "step": 147000 }, { "epoch": 27.03, "learning_rate": 4.985301322880941e-06, "loss": 1.3752, "step": 148000 }, { "epoch": 27.21, "learning_rate": 4.679078882900539e-06, "loss": 1.3171, "step": 149000 }, { "epoch": 27.39, "learning_rate": 4.372856442920137e-06, "loss": 1.3321, "step": 150000 }, { "epoch": 27.57, "learning_rate": 4.066634002939736e-06, "loss": 1.3365, "step": 151000 }, { "epoch": 27.76, "learning_rate": 3.7604115629593337e-06, "loss": 1.3424, "step": 152000 }, { "epoch": 27.94, "learning_rate": 3.454189122978932e-06, "loss": 1.3471, "step": 153000 }, { "epoch": 28.12, "learning_rate": 3.14796668299853e-06, "loss": 1.3095, "step": 154000 }, { "epoch": 28.31, "learning_rate": 2.8417442430181285e-06, "loss": 1.3113, "step": 155000 }, { "epoch": 28.49, "learning_rate": 2.5355218030377265e-06, "loss": 1.3017, "step": 156000 }, { "epoch": 28.67, "learning_rate": 2.229299363057325e-06, "loss": 1.3147, "step": 157000 }, { "epoch": 28.85, "learning_rate": 1.9230769230769234e-06, "loss": 1.3135, "step": 158000 }, { "epoch": 29.04, "learning_rate": 1.6168544830965214e-06, "loss": 1.309, "step": 159000 }, { "epoch": 29.22, "learning_rate": 1.3106320431161196e-06, "loss": 1.2784, "step": 160000 }, { "epoch": 29.4, "learning_rate": 1.0044096031357178e-06, "loss": 1.2903, "step": 161000 }, { "epoch": 29.58, "learning_rate": 6.98187163155316e-07, "loss": 1.2919, "step": 162000 }, { "epoch": 29.77, "learning_rate": 3.9196472317491427e-07, "loss": 1.2802, "step": 163000 }, { "epoch": 29.95, "learning_rate": 8.57422831945125e-08, "loss": 1.2904, "step": 164000 } ], "max_steps": 164280, "num_train_epochs": 30, "total_flos": 3.45466660974336e+17, "trial_name": null, "trial_params": null }