{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 141057, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.982276668297213e-05, "loss": 2.4577, "step": 500 }, { "epoch": 0.02, "learning_rate": 4.964553336594426e-05, "loss": 2.4076, "step": 1000 }, { "epoch": 0.03, "learning_rate": 4.94683000489164e-05, "loss": 2.3823, "step": 1500 }, { "epoch": 0.04, "learning_rate": 4.929106673188853e-05, "loss": 2.3764, "step": 2000 }, { "epoch": 0.05, "learning_rate": 4.911383341486066e-05, "loss": 2.3482, "step": 2500 }, { "epoch": 0.06, "learning_rate": 4.893660009783279e-05, "loss": 2.3238, "step": 3000 }, { "epoch": 0.07, "learning_rate": 4.8759366780804925e-05, "loss": 2.3344, "step": 3500 }, { "epoch": 0.09, "learning_rate": 4.8582133463777055e-05, "loss": 2.3137, "step": 4000 }, { "epoch": 0.1, "learning_rate": 4.840490014674919e-05, "loss": 2.3036, "step": 4500 }, { "epoch": 0.11, "learning_rate": 4.822766682972132e-05, "loss": 2.2799, "step": 5000 }, { "epoch": 0.12, "learning_rate": 4.805043351269346e-05, "loss": 2.283, "step": 5500 }, { "epoch": 0.13, "learning_rate": 4.787320019566559e-05, "loss": 2.271, "step": 6000 }, { "epoch": 0.14, "learning_rate": 4.769596687863772e-05, "loss": 2.267, "step": 6500 }, { "epoch": 0.15, "learning_rate": 4.751873356160985e-05, "loss": 2.2637, "step": 7000 }, { "epoch": 0.16, "learning_rate": 4.7341500244581984e-05, "loss": 2.2565, "step": 7500 }, { "epoch": 0.17, "learning_rate": 4.7164266927554114e-05, "loss": 2.2501, "step": 8000 }, { "epoch": 0.18, "learning_rate": 4.6987033610526244e-05, "loss": 2.2233, "step": 8500 }, { "epoch": 0.19, "learning_rate": 4.6809800293498373e-05, "loss": 2.2321, "step": 9000 }, { "epoch": 0.2, "learning_rate": 4.663256697647051e-05, "loss": 2.221, "step": 9500 }, { "epoch": 0.21, "learning_rate": 4.645533365944264e-05, "loss": 2.2343, "step": 10000 }, { "epoch": 0.22, "learning_rate": 4.627810034241477e-05, "loss": 2.2038, "step": 10500 }, { "epoch": 0.23, "learning_rate": 4.61008670253869e-05, "loss": 2.2013, "step": 11000 }, { "epoch": 0.24, "learning_rate": 4.592363370835903e-05, "loss": 2.2105, "step": 11500 }, { "epoch": 0.26, "learning_rate": 4.5746400391331166e-05, "loss": 2.1881, "step": 12000 }, { "epoch": 0.27, "learning_rate": 4.5569167074303296e-05, "loss": 2.1832, "step": 12500 }, { "epoch": 0.28, "learning_rate": 4.5391933757275426e-05, "loss": 2.1774, "step": 13000 }, { "epoch": 0.29, "learning_rate": 4.521470044024756e-05, "loss": 2.1714, "step": 13500 }, { "epoch": 0.3, "learning_rate": 4.503746712321969e-05, "loss": 2.1602, "step": 14000 }, { "epoch": 0.31, "learning_rate": 4.486023380619183e-05, "loss": 2.1583, "step": 14500 }, { "epoch": 0.32, "learning_rate": 4.468300048916396e-05, "loss": 2.1707, "step": 15000 }, { "epoch": 0.33, "learning_rate": 4.450576717213609e-05, "loss": 2.1689, "step": 15500 }, { "epoch": 0.34, "learning_rate": 4.4328533855108225e-05, "loss": 2.1479, "step": 16000 }, { "epoch": 0.35, "learning_rate": 4.4151300538080355e-05, "loss": 2.1554, "step": 16500 }, { "epoch": 0.36, "learning_rate": 4.3974067221052485e-05, "loss": 2.1309, "step": 17000 }, { "epoch": 0.37, "learning_rate": 4.3796833904024615e-05, "loss": 2.1328, "step": 17500 }, { "epoch": 0.38, "learning_rate": 4.361960058699675e-05, "loss": 2.1469, "step": 18000 }, { "epoch": 0.39, "learning_rate": 4.344236726996888e-05, "loss": 2.1316, "step": 18500 }, { "epoch": 0.4, "learning_rate": 4.326513395294101e-05, "loss": 2.1387, "step": 19000 }, { "epoch": 0.41, "learning_rate": 4.308790063591314e-05, "loss": 2.1143, "step": 19500 }, { "epoch": 0.43, "learning_rate": 4.291066731888528e-05, "loss": 2.1301, "step": 20000 }, { "epoch": 0.44, "learning_rate": 4.273343400185741e-05, "loss": 2.1275, "step": 20500 }, { "epoch": 0.45, "learning_rate": 4.255620068482954e-05, "loss": 2.1172, "step": 21000 }, { "epoch": 0.46, "learning_rate": 4.237896736780167e-05, "loss": 2.118, "step": 21500 }, { "epoch": 0.47, "learning_rate": 4.2201734050773804e-05, "loss": 2.1287, "step": 22000 }, { "epoch": 0.48, "learning_rate": 4.202450073374593e-05, "loss": 2.1148, "step": 22500 }, { "epoch": 0.49, "learning_rate": 4.184726741671807e-05, "loss": 2.1019, "step": 23000 }, { "epoch": 0.5, "learning_rate": 4.16700340996902e-05, "loss": 2.1088, "step": 23500 }, { "epoch": 0.51, "learning_rate": 4.149280078266233e-05, "loss": 2.1092, "step": 24000 }, { "epoch": 0.52, "learning_rate": 4.1315567465634466e-05, "loss": 2.0987, "step": 24500 }, { "epoch": 0.53, "learning_rate": 4.1138334148606596e-05, "loss": 2.0778, "step": 25000 }, { "epoch": 0.54, "learning_rate": 4.0961100831578726e-05, "loss": 2.0963, "step": 25500 }, { "epoch": 0.55, "learning_rate": 4.0783867514550856e-05, "loss": 2.0868, "step": 26000 }, { "epoch": 0.56, "learning_rate": 4.060663419752299e-05, "loss": 2.0881, "step": 26500 }, { "epoch": 0.57, "learning_rate": 4.042940088049512e-05, "loss": 2.0887, "step": 27000 }, { "epoch": 0.58, "learning_rate": 4.025216756346725e-05, "loss": 2.0921, "step": 27500 }, { "epoch": 0.6, "learning_rate": 4.007493424643938e-05, "loss": 2.0782, "step": 28000 }, { "epoch": 0.61, "learning_rate": 3.989770092941152e-05, "loss": 2.0626, "step": 28500 }, { "epoch": 0.62, "learning_rate": 3.972046761238365e-05, "loss": 2.077, "step": 29000 }, { "epoch": 0.63, "learning_rate": 3.954323429535578e-05, "loss": 2.0728, "step": 29500 }, { "epoch": 0.64, "learning_rate": 3.936600097832791e-05, "loss": 2.0528, "step": 30000 }, { "epoch": 0.65, "learning_rate": 3.918876766130004e-05, "loss": 2.0661, "step": 30500 }, { "epoch": 0.66, "learning_rate": 3.9011534344272175e-05, "loss": 2.0639, "step": 31000 }, { "epoch": 0.67, "learning_rate": 3.8834301027244304e-05, "loss": 2.063, "step": 31500 }, { "epoch": 0.68, "learning_rate": 3.865706771021644e-05, "loss": 2.0445, "step": 32000 }, { "epoch": 0.69, "learning_rate": 3.847983439318857e-05, "loss": 2.0514, "step": 32500 }, { "epoch": 0.7, "learning_rate": 3.830260107616071e-05, "loss": 2.0538, "step": 33000 }, { "epoch": 0.71, "learning_rate": 3.812536775913284e-05, "loss": 2.041, "step": 33500 }, { "epoch": 0.72, "learning_rate": 3.794813444210497e-05, "loss": 2.0441, "step": 34000 }, { "epoch": 0.73, "learning_rate": 3.77709011250771e-05, "loss": 2.044, "step": 34500 }, { "epoch": 0.74, "learning_rate": 3.7593667808049234e-05, "loss": 2.0449, "step": 35000 }, { "epoch": 0.76, "learning_rate": 3.7416434491021363e-05, "loss": 2.0381, "step": 35500 }, { "epoch": 0.77, "learning_rate": 3.723920117399349e-05, "loss": 2.0302, "step": 36000 }, { "epoch": 0.78, "learning_rate": 3.706196785696562e-05, "loss": 2.028, "step": 36500 }, { "epoch": 0.79, "learning_rate": 3.688473453993776e-05, "loss": 2.0335, "step": 37000 }, { "epoch": 0.8, "learning_rate": 3.670750122290989e-05, "loss": 2.0289, "step": 37500 }, { "epoch": 0.81, "learning_rate": 3.653026790588202e-05, "loss": 2.0072, "step": 38000 }, { "epoch": 0.82, "learning_rate": 3.635303458885415e-05, "loss": 2.0341, "step": 38500 }, { "epoch": 0.83, "learning_rate": 3.6175801271826286e-05, "loss": 2.0233, "step": 39000 }, { "epoch": 0.84, "learning_rate": 3.5998567954798416e-05, "loss": 2.0146, "step": 39500 }, { "epoch": 0.85, "learning_rate": 3.5821334637770546e-05, "loss": 2.0205, "step": 40000 }, { "epoch": 0.86, "learning_rate": 3.5644101320742675e-05, "loss": 2.009, "step": 40500 }, { "epoch": 0.87, "learning_rate": 3.546686800371481e-05, "loss": 2.0023, "step": 41000 }, { "epoch": 0.88, "learning_rate": 3.528963468668695e-05, "loss": 2.0102, "step": 41500 }, { "epoch": 0.89, "learning_rate": 3.511240136965908e-05, "loss": 2.0074, "step": 42000 }, { "epoch": 0.9, "learning_rate": 3.493516805263121e-05, "loss": 1.9968, "step": 42500 }, { "epoch": 0.91, "learning_rate": 3.475793473560334e-05, "loss": 1.9968, "step": 43000 }, { "epoch": 0.93, "learning_rate": 3.4580701418575475e-05, "loss": 2.0042, "step": 43500 }, { "epoch": 0.94, "learning_rate": 3.4403468101547605e-05, "loss": 1.9947, "step": 44000 }, { "epoch": 0.95, "learning_rate": 3.4226234784519735e-05, "loss": 1.9995, "step": 44500 }, { "epoch": 0.96, "learning_rate": 3.4049001467491864e-05, "loss": 1.9929, "step": 45000 }, { "epoch": 0.97, "learning_rate": 3.3871768150464e-05, "loss": 1.9935, "step": 45500 }, { "epoch": 0.98, "learning_rate": 3.369453483343613e-05, "loss": 1.9931, "step": 46000 }, { "epoch": 0.99, "learning_rate": 3.351730151640826e-05, "loss": 1.9944, "step": 46500 }, { "epoch": 1.0, "learning_rate": 3.334006819938039e-05, "loss": 1.9779, "step": 47000 }, { "epoch": 1.01, "learning_rate": 3.316283488235253e-05, "loss": 1.9819, "step": 47500 }, { "epoch": 1.02, "learning_rate": 3.298560156532466e-05, "loss": 1.9798, "step": 48000 }, { "epoch": 1.03, "learning_rate": 3.280836824829679e-05, "loss": 1.9767, "step": 48500 }, { "epoch": 1.04, "learning_rate": 3.263113493126892e-05, "loss": 1.963, "step": 49000 }, { "epoch": 1.05, "learning_rate": 3.245390161424105e-05, "loss": 1.9686, "step": 49500 }, { "epoch": 1.06, "learning_rate": 3.227666829721318e-05, "loss": 1.9529, "step": 50000 }, { "epoch": 1.07, "learning_rate": 3.209943498018532e-05, "loss": 1.96, "step": 50500 }, { "epoch": 1.08, "learning_rate": 3.192220166315745e-05, "loss": 1.9683, "step": 51000 }, { "epoch": 1.1, "learning_rate": 3.174496834612958e-05, "loss": 1.9574, "step": 51500 }, { "epoch": 1.11, "learning_rate": 3.1567735029101716e-05, "loss": 1.9573, "step": 52000 }, { "epoch": 1.12, "learning_rate": 3.1390501712073846e-05, "loss": 1.9582, "step": 52500 }, { "epoch": 1.13, "learning_rate": 3.1213268395045976e-05, "loss": 1.9559, "step": 53000 }, { "epoch": 1.14, "learning_rate": 3.1036035078018106e-05, "loss": 1.9464, "step": 53500 }, { "epoch": 1.15, "learning_rate": 3.085880176099024e-05, "loss": 1.9512, "step": 54000 }, { "epoch": 1.16, "learning_rate": 3.068156844396237e-05, "loss": 1.9676, "step": 54500 }, { "epoch": 1.17, "learning_rate": 3.0504335126934502e-05, "loss": 1.946, "step": 55000 }, { "epoch": 1.18, "learning_rate": 3.0327101809906632e-05, "loss": 1.9586, "step": 55500 }, { "epoch": 1.19, "learning_rate": 3.014986849287877e-05, "loss": 1.9553, "step": 56000 }, { "epoch": 1.2, "learning_rate": 2.9972635175850898e-05, "loss": 1.9424, "step": 56500 }, { "epoch": 1.21, "learning_rate": 2.979540185882303e-05, "loss": 1.9431, "step": 57000 }, { "epoch": 1.22, "learning_rate": 2.961816854179516e-05, "loss": 1.9562, "step": 57500 }, { "epoch": 1.23, "learning_rate": 2.9440935224767298e-05, "loss": 1.9376, "step": 58000 }, { "epoch": 1.24, "learning_rate": 2.9263701907739428e-05, "loss": 1.9313, "step": 58500 }, { "epoch": 1.25, "learning_rate": 2.9086468590711558e-05, "loss": 1.9492, "step": 59000 }, { "epoch": 1.27, "learning_rate": 2.8909235273683687e-05, "loss": 1.9396, "step": 59500 }, { "epoch": 1.28, "learning_rate": 2.8732001956655824e-05, "loss": 1.9343, "step": 60000 }, { "epoch": 1.29, "learning_rate": 2.8554768639627954e-05, "loss": 1.9446, "step": 60500 }, { "epoch": 1.3, "learning_rate": 2.8377535322600084e-05, "loss": 1.9325, "step": 61000 }, { "epoch": 1.31, "learning_rate": 2.8200302005572217e-05, "loss": 1.9321, "step": 61500 }, { "epoch": 1.32, "learning_rate": 2.8023068688544347e-05, "loss": 1.9295, "step": 62000 }, { "epoch": 1.33, "learning_rate": 2.7845835371516483e-05, "loss": 1.928, "step": 62500 }, { "epoch": 1.34, "learning_rate": 2.7668602054488613e-05, "loss": 1.939, "step": 63000 }, { "epoch": 1.35, "learning_rate": 2.7491368737460743e-05, "loss": 1.9172, "step": 63500 }, { "epoch": 1.36, "learning_rate": 2.7314135420432873e-05, "loss": 1.9185, "step": 64000 }, { "epoch": 1.37, "learning_rate": 2.713690210340501e-05, "loss": 1.9299, "step": 64500 }, { "epoch": 1.38, "learning_rate": 2.695966878637714e-05, "loss": 1.9301, "step": 65000 }, { "epoch": 1.39, "learning_rate": 2.678243546934927e-05, "loss": 1.9189, "step": 65500 }, { "epoch": 1.4, "learning_rate": 2.6605202152321402e-05, "loss": 1.9303, "step": 66000 }, { "epoch": 1.41, "learning_rate": 2.6427968835293536e-05, "loss": 1.9151, "step": 66500 }, { "epoch": 1.42, "learning_rate": 2.625073551826567e-05, "loss": 1.9236, "step": 67000 }, { "epoch": 1.44, "learning_rate": 2.60735022012378e-05, "loss": 1.9198, "step": 67500 }, { "epoch": 1.45, "learning_rate": 2.589626888420993e-05, "loss": 1.91, "step": 68000 }, { "epoch": 1.46, "learning_rate": 2.5719035567182065e-05, "loss": 1.8999, "step": 68500 }, { "epoch": 1.47, "learning_rate": 2.5541802250154195e-05, "loss": 1.9141, "step": 69000 }, { "epoch": 1.48, "learning_rate": 2.5364568933126325e-05, "loss": 1.9012, "step": 69500 }, { "epoch": 1.49, "learning_rate": 2.5187335616098455e-05, "loss": 1.8999, "step": 70000 }, { "epoch": 1.5, "learning_rate": 2.501010229907059e-05, "loss": 1.8923, "step": 70500 }, { "epoch": 1.51, "learning_rate": 2.483286898204272e-05, "loss": 1.9068, "step": 71000 }, { "epoch": 1.52, "learning_rate": 2.4655635665014854e-05, "loss": 1.9013, "step": 71500 }, { "epoch": 1.53, "learning_rate": 2.4478402347986984e-05, "loss": 1.9018, "step": 72000 }, { "epoch": 1.54, "learning_rate": 2.4301169030959117e-05, "loss": 1.8892, "step": 72500 }, { "epoch": 1.55, "learning_rate": 2.4123935713931247e-05, "loss": 1.9016, "step": 73000 }, { "epoch": 1.56, "learning_rate": 2.394670239690338e-05, "loss": 1.9058, "step": 73500 }, { "epoch": 1.57, "learning_rate": 2.376946907987551e-05, "loss": 1.91, "step": 74000 }, { "epoch": 1.58, "learning_rate": 2.3592235762847644e-05, "loss": 1.889, "step": 74500 }, { "epoch": 1.6, "learning_rate": 2.3415002445819777e-05, "loss": 1.8988, "step": 75000 }, { "epoch": 1.61, "learning_rate": 2.323776912879191e-05, "loss": 1.8957, "step": 75500 }, { "epoch": 1.62, "learning_rate": 2.306053581176404e-05, "loss": 1.8938, "step": 76000 }, { "epoch": 1.63, "learning_rate": 2.2883302494736173e-05, "loss": 1.8979, "step": 76500 }, { "epoch": 1.64, "learning_rate": 2.2706069177708303e-05, "loss": 1.8898, "step": 77000 }, { "epoch": 1.65, "learning_rate": 2.2528835860680436e-05, "loss": 1.891, "step": 77500 }, { "epoch": 1.66, "learning_rate": 2.2351602543652566e-05, "loss": 1.8858, "step": 78000 }, { "epoch": 1.67, "learning_rate": 2.21743692266247e-05, "loss": 1.8749, "step": 78500 }, { "epoch": 1.68, "learning_rate": 2.199713590959683e-05, "loss": 1.8833, "step": 79000 }, { "epoch": 1.69, "learning_rate": 2.1819902592568962e-05, "loss": 1.8788, "step": 79500 }, { "epoch": 1.7, "learning_rate": 2.1642669275541096e-05, "loss": 1.8809, "step": 80000 }, { "epoch": 1.71, "learning_rate": 2.146543595851323e-05, "loss": 1.8796, "step": 80500 }, { "epoch": 1.72, "learning_rate": 2.128820264148536e-05, "loss": 1.8863, "step": 81000 }, { "epoch": 1.73, "learning_rate": 2.111096932445749e-05, "loss": 1.8814, "step": 81500 }, { "epoch": 1.74, "learning_rate": 2.0933736007429622e-05, "loss": 1.8814, "step": 82000 }, { "epoch": 1.75, "learning_rate": 2.075650269040175e-05, "loss": 1.8654, "step": 82500 }, { "epoch": 1.77, "learning_rate": 2.0579269373373885e-05, "loss": 1.8806, "step": 83000 }, { "epoch": 1.78, "learning_rate": 2.0402036056346015e-05, "loss": 1.8659, "step": 83500 }, { "epoch": 1.79, "learning_rate": 2.0224802739318148e-05, "loss": 1.8837, "step": 84000 }, { "epoch": 1.8, "learning_rate": 2.004756942229028e-05, "loss": 1.8608, "step": 84500 }, { "epoch": 1.81, "learning_rate": 1.9870336105262414e-05, "loss": 1.8754, "step": 85000 }, { "epoch": 1.82, "learning_rate": 1.9693102788234544e-05, "loss": 1.8703, "step": 85500 }, { "epoch": 1.83, "learning_rate": 1.9515869471206677e-05, "loss": 1.8642, "step": 86000 }, { "epoch": 1.84, "learning_rate": 1.9338636154178807e-05, "loss": 1.8701, "step": 86500 }, { "epoch": 1.85, "learning_rate": 1.916140283715094e-05, "loss": 1.863, "step": 87000 }, { "epoch": 1.86, "learning_rate": 1.898416952012307e-05, "loss": 1.8672, "step": 87500 }, { "epoch": 1.87, "learning_rate": 1.8806936203095204e-05, "loss": 1.8638, "step": 88000 }, { "epoch": 1.88, "learning_rate": 1.8629702886067333e-05, "loss": 1.8709, "step": 88500 }, { "epoch": 1.89, "learning_rate": 1.845246956903947e-05, "loss": 1.8496, "step": 89000 }, { "epoch": 1.9, "learning_rate": 1.82752362520116e-05, "loss": 1.8601, "step": 89500 }, { "epoch": 1.91, "learning_rate": 1.8098002934983733e-05, "loss": 1.8628, "step": 90000 }, { "epoch": 1.92, "learning_rate": 1.7920769617955863e-05, "loss": 1.8551, "step": 90500 }, { "epoch": 1.94, "learning_rate": 1.7743536300927993e-05, "loss": 1.8493, "step": 91000 }, { "epoch": 1.95, "learning_rate": 1.7566302983900126e-05, "loss": 1.866, "step": 91500 }, { "epoch": 1.96, "learning_rate": 1.7389069666872256e-05, "loss": 1.8527, "step": 92000 }, { "epoch": 1.97, "learning_rate": 1.721183634984439e-05, "loss": 1.8541, "step": 92500 }, { "epoch": 1.98, "learning_rate": 1.7034603032816522e-05, "loss": 1.8483, "step": 93000 }, { "epoch": 1.99, "learning_rate": 1.6857369715788656e-05, "loss": 1.8555, "step": 93500 }, { "epoch": 2.0, "learning_rate": 1.6680136398760785e-05, "loss": 1.8524, "step": 94000 }, { "epoch": 2.01, "learning_rate": 1.650290308173292e-05, "loss": 1.8502, "step": 94500 }, { "epoch": 2.02, "learning_rate": 1.632566976470505e-05, "loss": 1.8332, "step": 95000 }, { "epoch": 2.03, "learning_rate": 1.614843644767718e-05, "loss": 1.8336, "step": 95500 }, { "epoch": 2.04, "learning_rate": 1.597120313064931e-05, "loss": 1.8362, "step": 96000 }, { "epoch": 2.05, "learning_rate": 1.5793969813621445e-05, "loss": 1.845, "step": 96500 }, { "epoch": 2.06, "learning_rate": 1.5616736496593575e-05, "loss": 1.8313, "step": 97000 }, { "epoch": 2.07, "learning_rate": 1.5439503179565708e-05, "loss": 1.8447, "step": 97500 }, { "epoch": 2.08, "learning_rate": 1.526226986253784e-05, "loss": 1.8309, "step": 98000 }, { "epoch": 2.09, "learning_rate": 1.5085036545509973e-05, "loss": 1.8321, "step": 98500 }, { "epoch": 2.11, "learning_rate": 1.4907803228482104e-05, "loss": 1.8326, "step": 99000 }, { "epoch": 2.12, "learning_rate": 1.4730569911454237e-05, "loss": 1.8386, "step": 99500 }, { "epoch": 2.13, "learning_rate": 1.4553336594426367e-05, "loss": 1.8316, "step": 100000 }, { "epoch": 2.14, "learning_rate": 1.43761032773985e-05, "loss": 1.8337, "step": 100500 }, { "epoch": 2.15, "learning_rate": 1.419886996037063e-05, "loss": 1.8395, "step": 101000 }, { "epoch": 2.16, "learning_rate": 1.4021636643342762e-05, "loss": 1.8345, "step": 101500 }, { "epoch": 2.17, "learning_rate": 1.3844403326314895e-05, "loss": 1.8324, "step": 102000 }, { "epoch": 2.18, "learning_rate": 1.3667170009287025e-05, "loss": 1.8222, "step": 102500 }, { "epoch": 2.19, "learning_rate": 1.3489936692259158e-05, "loss": 1.8077, "step": 103000 }, { "epoch": 2.2, "learning_rate": 1.331270337523129e-05, "loss": 1.8202, "step": 103500 }, { "epoch": 2.21, "learning_rate": 1.3135470058203423e-05, "loss": 1.8052, "step": 104000 }, { "epoch": 2.22, "learning_rate": 1.2958236741175553e-05, "loss": 1.8069, "step": 104500 }, { "epoch": 2.23, "learning_rate": 1.2781003424147686e-05, "loss": 1.8158, "step": 105000 }, { "epoch": 2.24, "learning_rate": 1.2603770107119817e-05, "loss": 1.8183, "step": 105500 }, { "epoch": 2.25, "learning_rate": 1.242653679009195e-05, "loss": 1.8116, "step": 106000 }, { "epoch": 2.27, "learning_rate": 1.224930347306408e-05, "loss": 1.8095, "step": 106500 }, { "epoch": 2.28, "learning_rate": 1.2072070156036212e-05, "loss": 1.8207, "step": 107000 }, { "epoch": 2.29, "learning_rate": 1.1894836839008344e-05, "loss": 1.8184, "step": 107500 }, { "epoch": 2.3, "learning_rate": 1.1717603521980477e-05, "loss": 1.8278, "step": 108000 }, { "epoch": 2.31, "learning_rate": 1.1540370204952608e-05, "loss": 1.8164, "step": 108500 }, { "epoch": 2.32, "learning_rate": 1.136313688792474e-05, "loss": 1.8244, "step": 109000 }, { "epoch": 2.33, "learning_rate": 1.1185903570896871e-05, "loss": 1.8199, "step": 109500 }, { "epoch": 2.34, "learning_rate": 1.1008670253869003e-05, "loss": 1.8252, "step": 110000 }, { "epoch": 2.35, "learning_rate": 1.0831436936841136e-05, "loss": 1.808, "step": 110500 }, { "epoch": 2.36, "learning_rate": 1.0654203619813268e-05, "loss": 1.8097, "step": 111000 }, { "epoch": 2.37, "learning_rate": 1.04769703027854e-05, "loss": 1.8049, "step": 111500 }, { "epoch": 2.38, "learning_rate": 1.029973698575753e-05, "loss": 1.8081, "step": 112000 }, { "epoch": 2.39, "learning_rate": 1.0122503668729662e-05, "loss": 1.8134, "step": 112500 }, { "epoch": 2.4, "learning_rate": 9.945270351701796e-06, "loss": 1.7976, "step": 113000 }, { "epoch": 2.41, "learning_rate": 9.768037034673927e-06, "loss": 1.8036, "step": 113500 }, { "epoch": 2.42, "learning_rate": 9.590803717646059e-06, "loss": 1.7979, "step": 114000 }, { "epoch": 2.44, "learning_rate": 9.41357040061819e-06, "loss": 1.8165, "step": 114500 }, { "epoch": 2.45, "learning_rate": 9.236337083590323e-06, "loss": 1.8069, "step": 115000 }, { "epoch": 2.46, "learning_rate": 9.059103766562455e-06, "loss": 1.7922, "step": 115500 }, { "epoch": 2.47, "learning_rate": 8.881870449534587e-06, "loss": 1.7981, "step": 116000 }, { "epoch": 2.48, "learning_rate": 8.704637132506716e-06, "loss": 1.7998, "step": 116500 }, { "epoch": 2.49, "learning_rate": 8.52740381547885e-06, "loss": 1.7938, "step": 117000 }, { "epoch": 2.5, "learning_rate": 8.350170498450981e-06, "loss": 1.8041, "step": 117500 }, { "epoch": 2.51, "learning_rate": 8.172937181423113e-06, "loss": 1.8037, "step": 118000 }, { "epoch": 2.52, "learning_rate": 7.995703864395244e-06, "loss": 1.7945, "step": 118500 }, { "epoch": 2.53, "learning_rate": 7.818470547367376e-06, "loss": 1.8138, "step": 119000 }, { "epoch": 2.54, "learning_rate": 7.641237230339509e-06, "loss": 1.792, "step": 119500 }, { "epoch": 2.55, "learning_rate": 7.4640039133116405e-06, "loss": 1.7963, "step": 120000 }, { "epoch": 2.56, "learning_rate": 7.286770596283772e-06, "loss": 1.7906, "step": 120500 }, { "epoch": 2.57, "learning_rate": 7.1095372792559036e-06, "loss": 1.7933, "step": 121000 }, { "epoch": 2.58, "learning_rate": 6.932303962228036e-06, "loss": 1.7877, "step": 121500 }, { "epoch": 2.59, "learning_rate": 6.7550706452001675e-06, "loss": 1.7999, "step": 122000 }, { "epoch": 2.61, "learning_rate": 6.5778373281723e-06, "loss": 1.7839, "step": 122500 }, { "epoch": 2.62, "learning_rate": 6.400604011144431e-06, "loss": 1.7907, "step": 123000 }, { "epoch": 2.63, "learning_rate": 6.223370694116564e-06, "loss": 1.7869, "step": 123500 }, { "epoch": 2.64, "learning_rate": 6.0461373770886945e-06, "loss": 1.7736, "step": 124000 }, { "epoch": 2.65, "learning_rate": 5.868904060060827e-06, "loss": 1.7825, "step": 124500 }, { "epoch": 2.66, "learning_rate": 5.691670743032958e-06, "loss": 1.7831, "step": 125000 }, { "epoch": 2.67, "learning_rate": 5.51443742600509e-06, "loss": 1.7831, "step": 125500 }, { "epoch": 2.68, "learning_rate": 5.337204108977222e-06, "loss": 1.7848, "step": 126000 }, { "epoch": 2.69, "learning_rate": 5.159970791949354e-06, "loss": 1.793, "step": 126500 }, { "epoch": 2.7, "learning_rate": 4.982737474921486e-06, "loss": 1.7887, "step": 127000 }, { "epoch": 2.71, "learning_rate": 4.805504157893618e-06, "loss": 1.7824, "step": 127500 }, { "epoch": 2.72, "learning_rate": 4.62827084086575e-06, "loss": 1.7885, "step": 128000 }, { "epoch": 2.73, "learning_rate": 4.451037523837882e-06, "loss": 1.7892, "step": 128500 }, { "epoch": 2.74, "learning_rate": 4.273804206810013e-06, "loss": 1.7832, "step": 129000 }, { "epoch": 2.75, "learning_rate": 4.096570889782145e-06, "loss": 1.7705, "step": 129500 }, { "epoch": 2.76, "learning_rate": 3.919337572754276e-06, "loss": 1.7775, "step": 130000 }, { "epoch": 2.78, "learning_rate": 3.7421042557264087e-06, "loss": 1.7737, "step": 130500 }, { "epoch": 2.79, "learning_rate": 3.5648709386985406e-06, "loss": 1.7858, "step": 131000 }, { "epoch": 2.8, "learning_rate": 3.387637621670672e-06, "loss": 1.784, "step": 131500 }, { "epoch": 2.81, "learning_rate": 3.210404304642804e-06, "loss": 1.7766, "step": 132000 }, { "epoch": 2.82, "learning_rate": 3.033170987614936e-06, "loss": 1.7806, "step": 132500 }, { "epoch": 2.83, "learning_rate": 2.8559376705870676e-06, "loss": 1.7574, "step": 133000 }, { "epoch": 2.84, "learning_rate": 2.6787043535591996e-06, "loss": 1.7697, "step": 133500 }, { "epoch": 2.85, "learning_rate": 2.5014710365313316e-06, "loss": 1.7834, "step": 134000 }, { "epoch": 2.86, "learning_rate": 2.3242377195034635e-06, "loss": 1.7644, "step": 134500 }, { "epoch": 2.87, "learning_rate": 2.147004402475595e-06, "loss": 1.7794, "step": 135000 }, { "epoch": 2.88, "learning_rate": 1.9697710854477266e-06, "loss": 1.7657, "step": 135500 }, { "epoch": 2.89, "learning_rate": 1.7925377684198588e-06, "loss": 1.7786, "step": 136000 }, { "epoch": 2.9, "learning_rate": 1.6153044513919905e-06, "loss": 1.7756, "step": 136500 }, { "epoch": 2.91, "learning_rate": 1.4380711343641223e-06, "loss": 1.7742, "step": 137000 }, { "epoch": 2.92, "learning_rate": 1.2608378173362542e-06, "loss": 1.7711, "step": 137500 }, { "epoch": 2.93, "learning_rate": 1.0836045003083862e-06, "loss": 1.7596, "step": 138000 }, { "epoch": 2.95, "learning_rate": 9.063711832805178e-07, "loss": 1.7777, "step": 138500 }, { "epoch": 2.96, "learning_rate": 7.291378662526497e-07, "loss": 1.7767, "step": 139000 }, { "epoch": 2.97, "learning_rate": 5.519045492247815e-07, "loss": 1.7738, "step": 139500 }, { "epoch": 2.98, "learning_rate": 3.746712321969133e-07, "loss": 1.787, "step": 140000 }, { "epoch": 2.99, "learning_rate": 1.9743791516904515e-07, "loss": 1.7725, "step": 140500 }, { "epoch": 3.0, "learning_rate": 2.0204598141176972e-08, "loss": 1.7788, "step": 141000 }, { "epoch": 3.0, "step": 141057, "total_flos": 3.722159267218719e+17, "train_loss": 1.9451130962921488, "train_runtime": 85182.9847, "train_samples_per_second": 16.559, "train_steps_per_second": 1.656 } ], "max_steps": 141057, "num_train_epochs": 3, "total_flos": 3.722159267218719e+17, "trial_name": null, "trial_params": null }