{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.944640753828033, "global_step": 160000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.995398998822144e-05, "loss": 0.2082, "step": 500 }, { "epoch": 0.02, "learning_rate": 4.990797997644288e-05, "loss": 0.1548, "step": 1000 }, { "epoch": 0.03, "learning_rate": 4.9861969964664316e-05, "loss": 0.1389, "step": 1500 }, { "epoch": 0.04, "learning_rate": 4.9815959952885747e-05, "loss": 0.1344, "step": 2000 }, { "epoch": 0.05, "learning_rate": 4.9769949941107184e-05, "loss": 0.1306, "step": 2500 }, { "epoch": 0.06, "learning_rate": 4.972393992932862e-05, "loss": 0.1204, "step": 3000 }, { "epoch": 0.06, "learning_rate": 4.967792991755006e-05, "loss": 0.1265, "step": 3500 }, { "epoch": 0.07, "learning_rate": 4.96319199057715e-05, "loss": 0.1253, "step": 4000 }, { "epoch": 0.08, "learning_rate": 4.9585909893992935e-05, "loss": 0.1151, "step": 4500 }, { "epoch": 0.09, "learning_rate": 4.953989988221437e-05, "loss": 0.1133, "step": 5000 }, { "epoch": 0.1, "learning_rate": 4.949388987043581e-05, "loss": 0.1193, "step": 5500 }, { "epoch": 0.11, "learning_rate": 4.944787985865725e-05, "loss": 0.1181, "step": 6000 }, { "epoch": 0.12, "learning_rate": 4.9401869846878686e-05, "loss": 0.1124, "step": 6500 }, { "epoch": 0.13, "learning_rate": 4.9355859835100124e-05, "loss": 0.1117, "step": 7000 }, { "epoch": 0.14, "learning_rate": 4.9309849823321555e-05, "loss": 0.1117, "step": 7500 }, { "epoch": 0.15, "learning_rate": 4.926383981154299e-05, "loss": 0.1064, "step": 8000 }, { "epoch": 0.16, "learning_rate": 4.921782979976443e-05, "loss": 0.1073, "step": 8500 }, { "epoch": 0.17, "learning_rate": 4.917181978798587e-05, "loss": 0.1064, "step": 9000 }, { "epoch": 0.17, "learning_rate": 4.9125809776207306e-05, "loss": 0.103, "step": 9500 }, { "epoch": 0.18, "learning_rate": 4.9079799764428744e-05, "loss": 0.1039, "step": 10000 }, { "epoch": 0.19, "learning_rate": 4.9033789752650175e-05, "loss": 0.0993, "step": 10500 }, { "epoch": 0.2, "learning_rate": 4.898777974087161e-05, "loss": 0.0978, "step": 11000 }, { "epoch": 0.21, "learning_rate": 4.894176972909305e-05, "loss": 0.1037, "step": 11500 }, { "epoch": 0.22, "learning_rate": 4.889575971731449e-05, "loss": 0.1021, "step": 12000 }, { "epoch": 0.23, "learning_rate": 4.8849749705535926e-05, "loss": 0.104, "step": 12500 }, { "epoch": 0.24, "learning_rate": 4.880373969375736e-05, "loss": 0.1, "step": 13000 }, { "epoch": 0.25, "learning_rate": 4.87577296819788e-05, "loss": 0.1069, "step": 13500 }, { "epoch": 0.26, "learning_rate": 4.871171967020024e-05, "loss": 0.1063, "step": 14000 }, { "epoch": 0.27, "learning_rate": 4.8665709658421677e-05, "loss": 0.1061, "step": 14500 }, { "epoch": 0.28, "learning_rate": 4.8619699646643114e-05, "loss": 0.1037, "step": 15000 }, { "epoch": 0.29, "learning_rate": 4.857368963486455e-05, "loss": 0.0995, "step": 15500 }, { "epoch": 0.29, "learning_rate": 4.852767962308599e-05, "loss": 0.0999, "step": 16000 }, { "epoch": 0.3, "learning_rate": 4.848166961130742e-05, "loss": 0.0985, "step": 16500 }, { "epoch": 0.31, "learning_rate": 4.843565959952886e-05, "loss": 0.0925, "step": 17000 }, { "epoch": 0.32, "learning_rate": 4.8389649587750296e-05, "loss": 0.0996, "step": 17500 }, { "epoch": 0.33, "learning_rate": 4.8343639575971734e-05, "loss": 0.1022, "step": 18000 }, { "epoch": 0.34, "learning_rate": 4.829762956419317e-05, "loss": 0.0979, "step": 18500 }, { "epoch": 0.35, "learning_rate": 4.825161955241461e-05, "loss": 0.0969, "step": 19000 }, { "epoch": 0.36, "learning_rate": 4.820560954063604e-05, "loss": 0.109, "step": 19500 }, { "epoch": 0.37, "learning_rate": 4.815959952885748e-05, "loss": 0.1, "step": 20000 }, { "epoch": 0.38, "learning_rate": 4.8113589517078916e-05, "loss": 0.0998, "step": 20500 }, { "epoch": 0.39, "learning_rate": 4.8067579505300354e-05, "loss": 0.0954, "step": 21000 }, { "epoch": 0.4, "learning_rate": 4.80215694935218e-05, "loss": 0.1004, "step": 21500 }, { "epoch": 0.4, "learning_rate": 4.797555948174323e-05, "loss": 0.097, "step": 22000 }, { "epoch": 0.41, "learning_rate": 4.792954946996467e-05, "loss": 0.0972, "step": 22500 }, { "epoch": 0.42, "learning_rate": 4.7883539458186105e-05, "loss": 0.0958, "step": 23000 }, { "epoch": 0.43, "learning_rate": 4.783752944640754e-05, "loss": 0.0981, "step": 23500 }, { "epoch": 0.44, "learning_rate": 4.779151943462898e-05, "loss": 0.0962, "step": 24000 }, { "epoch": 0.45, "learning_rate": 4.774550942285042e-05, "loss": 0.0994, "step": 24500 }, { "epoch": 0.46, "learning_rate": 4.769949941107185e-05, "loss": 0.0958, "step": 25000 }, { "epoch": 0.47, "learning_rate": 4.7653489399293287e-05, "loss": 0.0921, "step": 25500 }, { "epoch": 0.48, "learning_rate": 4.7607479387514724e-05, "loss": 0.0985, "step": 26000 }, { "epoch": 0.49, "learning_rate": 4.756146937573616e-05, "loss": 0.0957, "step": 26500 }, { "epoch": 0.5, "learning_rate": 4.75154593639576e-05, "loss": 0.0962, "step": 27000 }, { "epoch": 0.51, "learning_rate": 4.746944935217904e-05, "loss": 0.1003, "step": 27500 }, { "epoch": 0.52, "learning_rate": 4.742343934040047e-05, "loss": 0.1016, "step": 28000 }, { "epoch": 0.52, "learning_rate": 4.7377429328621906e-05, "loss": 0.0943, "step": 28500 }, { "epoch": 0.53, "learning_rate": 4.7331419316843344e-05, "loss": 0.0997, "step": 29000 }, { "epoch": 0.54, "learning_rate": 4.728540930506478e-05, "loss": 0.0924, "step": 29500 }, { "epoch": 0.55, "learning_rate": 4.7239399293286226e-05, "loss": 0.0959, "step": 30000 }, { "epoch": 0.56, "learning_rate": 4.719338928150766e-05, "loss": 0.0915, "step": 30500 }, { "epoch": 0.57, "learning_rate": 4.7147379269729095e-05, "loss": 0.0932, "step": 31000 }, { "epoch": 0.58, "learning_rate": 4.710136925795053e-05, "loss": 0.0964, "step": 31500 }, { "epoch": 0.59, "learning_rate": 4.705535924617197e-05, "loss": 0.1011, "step": 32000 }, { "epoch": 0.6, "learning_rate": 4.700934923439341e-05, "loss": 0.091, "step": 32500 }, { "epoch": 0.61, "learning_rate": 4.6963339222614846e-05, "loss": 0.0986, "step": 33000 }, { "epoch": 0.62, "learning_rate": 4.6917329210836284e-05, "loss": 0.0911, "step": 33500 }, { "epoch": 0.63, "learning_rate": 4.6871319199057715e-05, "loss": 0.0857, "step": 34000 }, { "epoch": 0.63, "learning_rate": 4.682530918727915e-05, "loss": 0.0983, "step": 34500 }, { "epoch": 0.64, "learning_rate": 4.677929917550059e-05, "loss": 0.0974, "step": 35000 }, { "epoch": 0.65, "learning_rate": 4.673328916372203e-05, "loss": 0.0911, "step": 35500 }, { "epoch": 0.66, "learning_rate": 4.6687279151943466e-05, "loss": 0.0945, "step": 36000 }, { "epoch": 0.67, "learning_rate": 4.66412691401649e-05, "loss": 0.0932, "step": 36500 }, { "epoch": 0.68, "learning_rate": 4.6595259128386334e-05, "loss": 0.0998, "step": 37000 }, { "epoch": 0.69, "learning_rate": 4.654924911660777e-05, "loss": 0.0896, "step": 37500 }, { "epoch": 0.7, "learning_rate": 4.650323910482921e-05, "loss": 0.0893, "step": 38000 }, { "epoch": 0.71, "learning_rate": 4.6457229093050654e-05, "loss": 0.0984, "step": 38500 }, { "epoch": 0.72, "learning_rate": 4.641121908127209e-05, "loss": 0.0867, "step": 39000 }, { "epoch": 0.73, "learning_rate": 4.636520906949352e-05, "loss": 0.0944, "step": 39500 }, { "epoch": 0.74, "learning_rate": 4.631919905771496e-05, "loss": 0.0918, "step": 40000 }, { "epoch": 0.75, "learning_rate": 4.62731890459364e-05, "loss": 0.0884, "step": 40500 }, { "epoch": 0.75, "learning_rate": 4.6227179034157836e-05, "loss": 0.0927, "step": 41000 }, { "epoch": 0.76, "learning_rate": 4.6181169022379274e-05, "loss": 0.0927, "step": 41500 }, { "epoch": 0.77, "learning_rate": 4.613515901060071e-05, "loss": 0.0978, "step": 42000 }, { "epoch": 0.78, "learning_rate": 4.608914899882214e-05, "loss": 0.0944, "step": 42500 }, { "epoch": 0.79, "learning_rate": 4.604313898704358e-05, "loss": 0.0918, "step": 43000 }, { "epoch": 0.8, "learning_rate": 4.599712897526502e-05, "loss": 0.0886, "step": 43500 }, { "epoch": 0.81, "learning_rate": 4.5951118963486456e-05, "loss": 0.0897, "step": 44000 }, { "epoch": 0.82, "learning_rate": 4.5905108951707894e-05, "loss": 0.0894, "step": 44500 }, { "epoch": 0.83, "learning_rate": 4.585909893992933e-05, "loss": 0.09, "step": 45000 }, { "epoch": 0.84, "learning_rate": 4.581308892815077e-05, "loss": 0.0934, "step": 45500 }, { "epoch": 0.85, "learning_rate": 4.57670789163722e-05, "loss": 0.0874, "step": 46000 }, { "epoch": 0.86, "learning_rate": 4.572106890459364e-05, "loss": 0.0929, "step": 46500 }, { "epoch": 0.86, "learning_rate": 4.567505889281508e-05, "loss": 0.0883, "step": 47000 }, { "epoch": 0.87, "learning_rate": 4.562904888103652e-05, "loss": 0.0929, "step": 47500 }, { "epoch": 0.88, "learning_rate": 4.558303886925796e-05, "loss": 0.0835, "step": 48000 }, { "epoch": 0.89, "learning_rate": 4.553702885747939e-05, "loss": 0.0925, "step": 48500 }, { "epoch": 0.9, "learning_rate": 4.5491018845700827e-05, "loss": 0.0934, "step": 49000 }, { "epoch": 0.91, "learning_rate": 4.5445008833922264e-05, "loss": 0.095, "step": 49500 }, { "epoch": 0.92, "learning_rate": 4.53989988221437e-05, "loss": 0.0861, "step": 50000 }, { "epoch": 0.93, "learning_rate": 4.535298881036514e-05, "loss": 0.089, "step": 50500 }, { "epoch": 0.94, "learning_rate": 4.530697879858658e-05, "loss": 0.0878, "step": 51000 }, { "epoch": 0.95, "learning_rate": 4.526096878680801e-05, "loss": 0.094, "step": 51500 }, { "epoch": 0.96, "learning_rate": 4.5214958775029446e-05, "loss": 0.0888, "step": 52000 }, { "epoch": 0.97, "learning_rate": 4.5168948763250884e-05, "loss": 0.0802, "step": 52500 }, { "epoch": 0.98, "learning_rate": 4.512293875147232e-05, "loss": 0.0945, "step": 53000 }, { "epoch": 0.98, "learning_rate": 4.507692873969376e-05, "loss": 0.0909, "step": 53500 }, { "epoch": 0.99, "learning_rate": 4.50309187279152e-05, "loss": 0.0863, "step": 54000 }, { "epoch": 1.0, "eval_accuracy": 0.9518667922735654, "eval_loss": 0.08465953916311264, "eval_runtime": 917.0055, "eval_samples_per_second": 270.875, "eval_steps_per_second": 16.93, "step": 54336 }, { "epoch": 1.0, "learning_rate": 4.498490871613663e-05, "loss": 0.0784, "step": 54500 }, { "epoch": 1.01, "learning_rate": 4.4938898704358066e-05, "loss": 0.0789, "step": 55000 }, { "epoch": 1.02, "learning_rate": 4.489288869257951e-05, "loss": 0.0799, "step": 55500 }, { "epoch": 1.03, "learning_rate": 4.484687868080095e-05, "loss": 0.0813, "step": 56000 }, { "epoch": 1.04, "learning_rate": 4.4800868669022386e-05, "loss": 0.0784, "step": 56500 }, { "epoch": 1.05, "learning_rate": 4.475485865724382e-05, "loss": 0.081, "step": 57000 }, { "epoch": 1.06, "learning_rate": 4.4708848645465255e-05, "loss": 0.0781, "step": 57500 }, { "epoch": 1.07, "learning_rate": 4.466283863368669e-05, "loss": 0.0764, "step": 58000 }, { "epoch": 1.08, "learning_rate": 4.461682862190813e-05, "loss": 0.0765, "step": 58500 }, { "epoch": 1.09, "learning_rate": 4.457081861012957e-05, "loss": 0.0798, "step": 59000 }, { "epoch": 1.1, "learning_rate": 4.4524808598351006e-05, "loss": 0.0805, "step": 59500 }, { "epoch": 1.1, "learning_rate": 4.4478798586572437e-05, "loss": 0.0795, "step": 60000 }, { "epoch": 1.11, "learning_rate": 4.4432788574793874e-05, "loss": 0.0802, "step": 60500 }, { "epoch": 1.12, "learning_rate": 4.438677856301531e-05, "loss": 0.0761, "step": 61000 }, { "epoch": 1.13, "learning_rate": 4.434076855123675e-05, "loss": 0.0847, "step": 61500 }, { "epoch": 1.14, "learning_rate": 4.429475853945819e-05, "loss": 0.0816, "step": 62000 }, { "epoch": 1.15, "learning_rate": 4.4248748527679625e-05, "loss": 0.0886, "step": 62500 }, { "epoch": 1.16, "learning_rate": 4.420273851590106e-05, "loss": 0.0796, "step": 63000 }, { "epoch": 1.17, "learning_rate": 4.4156728504122494e-05, "loss": 0.0809, "step": 63500 }, { "epoch": 1.18, "learning_rate": 4.411071849234394e-05, "loss": 0.0852, "step": 64000 }, { "epoch": 1.19, "learning_rate": 4.4064708480565376e-05, "loss": 0.0798, "step": 64500 }, { "epoch": 1.2, "learning_rate": 4.4018698468786814e-05, "loss": 0.0792, "step": 65000 }, { "epoch": 1.21, "learning_rate": 4.397268845700825e-05, "loss": 0.0814, "step": 65500 }, { "epoch": 1.21, "learning_rate": 4.392667844522968e-05, "loss": 0.0865, "step": 66000 }, { "epoch": 1.22, "learning_rate": 4.388066843345112e-05, "loss": 0.0796, "step": 66500 }, { "epoch": 1.23, "learning_rate": 4.383465842167256e-05, "loss": 0.0813, "step": 67000 }, { "epoch": 1.24, "learning_rate": 4.3788648409893996e-05, "loss": 0.0778, "step": 67500 }, { "epoch": 1.25, "learning_rate": 4.3742638398115434e-05, "loss": 0.0754, "step": 68000 }, { "epoch": 1.26, "learning_rate": 4.369662838633687e-05, "loss": 0.0824, "step": 68500 }, { "epoch": 1.27, "learning_rate": 4.36506183745583e-05, "loss": 0.0804, "step": 69000 }, { "epoch": 1.28, "learning_rate": 4.360460836277974e-05, "loss": 0.082, "step": 69500 }, { "epoch": 1.29, "learning_rate": 4.355859835100118e-05, "loss": 0.0823, "step": 70000 }, { "epoch": 1.3, "learning_rate": 4.3512588339222616e-05, "loss": 0.0795, "step": 70500 }, { "epoch": 1.31, "learning_rate": 4.346657832744405e-05, "loss": 0.0811, "step": 71000 }, { "epoch": 1.32, "learning_rate": 4.342056831566549e-05, "loss": 0.0838, "step": 71500 }, { "epoch": 1.33, "learning_rate": 4.337455830388692e-05, "loss": 0.0827, "step": 72000 }, { "epoch": 1.33, "learning_rate": 4.3328548292108367e-05, "loss": 0.0824, "step": 72500 }, { "epoch": 1.34, "learning_rate": 4.3282538280329804e-05, "loss": 0.0816, "step": 73000 }, { "epoch": 1.35, "learning_rate": 4.323652826855124e-05, "loss": 0.0796, "step": 73500 }, { "epoch": 1.36, "learning_rate": 4.319051825677268e-05, "loss": 0.0784, "step": 74000 }, { "epoch": 1.37, "learning_rate": 4.314450824499411e-05, "loss": 0.076, "step": 74500 }, { "epoch": 1.38, "learning_rate": 4.309849823321555e-05, "loss": 0.079, "step": 75000 }, { "epoch": 1.39, "learning_rate": 4.3052488221436986e-05, "loss": 0.0787, "step": 75500 }, { "epoch": 1.4, "learning_rate": 4.3006478209658424e-05, "loss": 0.0759, "step": 76000 }, { "epoch": 1.41, "learning_rate": 4.296046819787986e-05, "loss": 0.0797, "step": 76500 }, { "epoch": 1.42, "learning_rate": 4.29144581861013e-05, "loss": 0.0802, "step": 77000 }, { "epoch": 1.43, "learning_rate": 4.286844817432274e-05, "loss": 0.0839, "step": 77500 }, { "epoch": 1.44, "learning_rate": 4.282243816254417e-05, "loss": 0.082, "step": 78000 }, { "epoch": 1.44, "learning_rate": 4.2776428150765606e-05, "loss": 0.0759, "step": 78500 }, { "epoch": 1.45, "learning_rate": 4.2730418138987044e-05, "loss": 0.0814, "step": 79000 }, { "epoch": 1.46, "learning_rate": 4.268440812720848e-05, "loss": 0.0832, "step": 79500 }, { "epoch": 1.47, "learning_rate": 4.263839811542992e-05, "loss": 0.0779, "step": 80000 }, { "epoch": 1.48, "learning_rate": 4.259238810365136e-05, "loss": 0.0796, "step": 80500 }, { "epoch": 1.49, "learning_rate": 4.2546378091872795e-05, "loss": 0.084, "step": 81000 }, { "epoch": 1.5, "learning_rate": 4.250036808009423e-05, "loss": 0.08, "step": 81500 }, { "epoch": 1.51, "learning_rate": 4.245435806831567e-05, "loss": 0.0796, "step": 82000 }, { "epoch": 1.52, "learning_rate": 4.240834805653711e-05, "loss": 0.0784, "step": 82500 }, { "epoch": 1.53, "learning_rate": 4.2362338044758546e-05, "loss": 0.0783, "step": 83000 }, { "epoch": 1.54, "learning_rate": 4.2316328032979977e-05, "loss": 0.0859, "step": 83500 }, { "epoch": 1.55, "learning_rate": 4.2270318021201414e-05, "loss": 0.0802, "step": 84000 }, { "epoch": 1.56, "learning_rate": 4.222430800942285e-05, "loss": 0.0773, "step": 84500 }, { "epoch": 1.56, "learning_rate": 4.217829799764429e-05, "loss": 0.0831, "step": 85000 }, { "epoch": 1.57, "learning_rate": 4.213228798586573e-05, "loss": 0.0803, "step": 85500 }, { "epoch": 1.58, "learning_rate": 4.2086277974087165e-05, "loss": 0.0826, "step": 86000 }, { "epoch": 1.59, "learning_rate": 4.2040267962308596e-05, "loss": 0.0773, "step": 86500 }, { "epoch": 1.6, "learning_rate": 4.1994257950530034e-05, "loss": 0.0765, "step": 87000 }, { "epoch": 1.61, "learning_rate": 4.194824793875147e-05, "loss": 0.0801, "step": 87500 }, { "epoch": 1.62, "learning_rate": 4.190223792697291e-05, "loss": 0.0806, "step": 88000 }, { "epoch": 1.63, "learning_rate": 4.185622791519435e-05, "loss": 0.0814, "step": 88500 }, { "epoch": 1.64, "learning_rate": 4.1810217903415785e-05, "loss": 0.0734, "step": 89000 }, { "epoch": 1.65, "learning_rate": 4.176420789163722e-05, "loss": 0.0784, "step": 89500 }, { "epoch": 1.66, "learning_rate": 4.171819787985866e-05, "loss": 0.0776, "step": 90000 }, { "epoch": 1.67, "learning_rate": 4.16721878680801e-05, "loss": 0.0713, "step": 90500 }, { "epoch": 1.67, "learning_rate": 4.1626177856301536e-05, "loss": 0.0854, "step": 91000 }, { "epoch": 1.68, "learning_rate": 4.1580167844522974e-05, "loss": 0.0827, "step": 91500 }, { "epoch": 1.69, "learning_rate": 4.153415783274441e-05, "loss": 0.0778, "step": 92000 }, { "epoch": 1.7, "learning_rate": 4.148814782096584e-05, "loss": 0.0778, "step": 92500 }, { "epoch": 1.71, "learning_rate": 4.144213780918728e-05, "loss": 0.082, "step": 93000 }, { "epoch": 1.72, "learning_rate": 4.139612779740872e-05, "loss": 0.0789, "step": 93500 }, { "epoch": 1.73, "learning_rate": 4.1350117785630156e-05, "loss": 0.0823, "step": 94000 }, { "epoch": 1.74, "learning_rate": 4.130410777385159e-05, "loss": 0.0812, "step": 94500 }, { "epoch": 1.75, "learning_rate": 4.125809776207303e-05, "loss": 0.0852, "step": 95000 }, { "epoch": 1.76, "learning_rate": 4.121208775029446e-05, "loss": 0.0767, "step": 95500 }, { "epoch": 1.77, "learning_rate": 4.11660777385159e-05, "loss": 0.0775, "step": 96000 }, { "epoch": 1.78, "learning_rate": 4.112006772673734e-05, "loss": 0.0823, "step": 96500 }, { "epoch": 1.79, "learning_rate": 4.1074057714958775e-05, "loss": 0.0761, "step": 97000 }, { "epoch": 1.79, "learning_rate": 4.102804770318021e-05, "loss": 0.0715, "step": 97500 }, { "epoch": 1.8, "learning_rate": 4.098203769140165e-05, "loss": 0.0725, "step": 98000 }, { "epoch": 1.81, "learning_rate": 4.093602767962309e-05, "loss": 0.0818, "step": 98500 }, { "epoch": 1.82, "learning_rate": 4.0890017667844526e-05, "loss": 0.0827, "step": 99000 }, { "epoch": 1.83, "learning_rate": 4.0844007656065964e-05, "loss": 0.0795, "step": 99500 }, { "epoch": 1.84, "learning_rate": 4.07979976442874e-05, "loss": 0.0823, "step": 100000 }, { "epoch": 1.85, "learning_rate": 4.075198763250884e-05, "loss": 0.0778, "step": 100500 }, { "epoch": 1.86, "learning_rate": 4.070597762073027e-05, "loss": 0.0806, "step": 101000 }, { "epoch": 1.87, "learning_rate": 4.065996760895171e-05, "loss": 0.0857, "step": 101500 }, { "epoch": 1.88, "learning_rate": 4.0613957597173146e-05, "loss": 0.082, "step": 102000 }, { "epoch": 1.89, "learning_rate": 4.0567947585394584e-05, "loss": 0.0777, "step": 102500 }, { "epoch": 1.9, "learning_rate": 4.052193757361602e-05, "loss": 0.0705, "step": 103000 }, { "epoch": 1.9, "learning_rate": 4.047592756183746e-05, "loss": 0.0795, "step": 103500 }, { "epoch": 1.91, "learning_rate": 4.042991755005889e-05, "loss": 0.0761, "step": 104000 }, { "epoch": 1.92, "learning_rate": 4.038390753828033e-05, "loss": 0.0784, "step": 104500 }, { "epoch": 1.93, "learning_rate": 4.0337897526501766e-05, "loss": 0.0804, "step": 105000 }, { "epoch": 1.94, "learning_rate": 4.02918875147232e-05, "loss": 0.0789, "step": 105500 }, { "epoch": 1.95, "learning_rate": 4.024587750294464e-05, "loss": 0.0699, "step": 106000 }, { "epoch": 1.96, "learning_rate": 4.0199867491166086e-05, "loss": 0.0748, "step": 106500 }, { "epoch": 1.97, "learning_rate": 4.0153857479387517e-05, "loss": 0.0801, "step": 107000 }, { "epoch": 1.98, "learning_rate": 4.0107847467608954e-05, "loss": 0.0795, "step": 107500 }, { "epoch": 1.99, "learning_rate": 4.006183745583039e-05, "loss": 0.0834, "step": 108000 }, { "epoch": 2.0, "learning_rate": 4.001582744405183e-05, "loss": 0.0786, "step": 108500 }, { "epoch": 2.0, "eval_accuracy": 0.9535093440260232, "eval_loss": 0.08472639322280884, "eval_runtime": 916.4191, "eval_samples_per_second": 271.048, "eval_steps_per_second": 16.941, "step": 108672 }, { "epoch": 2.01, "learning_rate": 3.996981743227327e-05, "loss": 0.0723, "step": 109000 }, { "epoch": 2.02, "learning_rate": 3.9923807420494705e-05, "loss": 0.0672, "step": 109500 }, { "epoch": 2.02, "learning_rate": 3.9877797408716136e-05, "loss": 0.0694, "step": 110000 }, { "epoch": 2.03, "learning_rate": 3.9831787396937574e-05, "loss": 0.0671, "step": 110500 }, { "epoch": 2.04, "learning_rate": 3.978577738515901e-05, "loss": 0.0714, "step": 111000 }, { "epoch": 2.05, "learning_rate": 3.973976737338045e-05, "loss": 0.0668, "step": 111500 }, { "epoch": 2.06, "learning_rate": 3.969375736160189e-05, "loss": 0.0641, "step": 112000 }, { "epoch": 2.07, "learning_rate": 3.9647747349823325e-05, "loss": 0.0654, "step": 112500 }, { "epoch": 2.08, "learning_rate": 3.9601737338044756e-05, "loss": 0.0737, "step": 113000 }, { "epoch": 2.09, "learning_rate": 3.9555727326266194e-05, "loss": 0.0629, "step": 113500 }, { "epoch": 2.1, "learning_rate": 3.950971731448763e-05, "loss": 0.0648, "step": 114000 }, { "epoch": 2.11, "learning_rate": 3.946370730270907e-05, "loss": 0.0658, "step": 114500 }, { "epoch": 2.12, "learning_rate": 3.9417697290930514e-05, "loss": 0.0691, "step": 115000 }, { "epoch": 2.13, "learning_rate": 3.9371687279151945e-05, "loss": 0.0681, "step": 115500 }, { "epoch": 2.13, "learning_rate": 3.932567726737338e-05, "loss": 0.0689, "step": 116000 }, { "epoch": 2.14, "learning_rate": 3.927966725559482e-05, "loss": 0.0683, "step": 116500 }, { "epoch": 2.15, "learning_rate": 3.923365724381626e-05, "loss": 0.0701, "step": 117000 }, { "epoch": 2.16, "learning_rate": 3.9187647232037696e-05, "loss": 0.0669, "step": 117500 }, { "epoch": 2.17, "learning_rate": 3.914163722025913e-05, "loss": 0.0716, "step": 118000 }, { "epoch": 2.18, "learning_rate": 3.9095627208480564e-05, "loss": 0.0685, "step": 118500 }, { "epoch": 2.19, "learning_rate": 3.9049617196702e-05, "loss": 0.0669, "step": 119000 }, { "epoch": 2.2, "learning_rate": 3.900360718492344e-05, "loss": 0.0786, "step": 119500 }, { "epoch": 2.21, "learning_rate": 3.895759717314488e-05, "loss": 0.0675, "step": 120000 }, { "epoch": 2.22, "learning_rate": 3.8911587161366315e-05, "loss": 0.0692, "step": 120500 }, { "epoch": 2.23, "learning_rate": 3.886557714958775e-05, "loss": 0.0668, "step": 121000 }, { "epoch": 2.24, "learning_rate": 3.881956713780919e-05, "loss": 0.0715, "step": 121500 }, { "epoch": 2.25, "learning_rate": 3.877355712603062e-05, "loss": 0.0696, "step": 122000 }, { "epoch": 2.25, "learning_rate": 3.872754711425206e-05, "loss": 0.0727, "step": 122500 }, { "epoch": 2.26, "learning_rate": 3.86815371024735e-05, "loss": 0.0739, "step": 123000 }, { "epoch": 2.27, "learning_rate": 3.863552709069494e-05, "loss": 0.0696, "step": 123500 }, { "epoch": 2.28, "learning_rate": 3.858951707891638e-05, "loss": 0.0735, "step": 124000 }, { "epoch": 2.29, "learning_rate": 3.854350706713781e-05, "loss": 0.0699, "step": 124500 }, { "epoch": 2.3, "learning_rate": 3.849749705535925e-05, "loss": 0.0666, "step": 125000 }, { "epoch": 2.31, "learning_rate": 3.8451487043580686e-05, "loss": 0.0752, "step": 125500 }, { "epoch": 2.32, "learning_rate": 3.8405477031802124e-05, "loss": 0.0676, "step": 126000 }, { "epoch": 2.33, "learning_rate": 3.835946702002356e-05, "loss": 0.0672, "step": 126500 }, { "epoch": 2.34, "learning_rate": 3.8313457008245e-05, "loss": 0.0681, "step": 127000 }, { "epoch": 2.35, "learning_rate": 3.826744699646643e-05, "loss": 0.0756, "step": 127500 }, { "epoch": 2.36, "learning_rate": 3.822143698468787e-05, "loss": 0.0782, "step": 128000 }, { "epoch": 2.36, "learning_rate": 3.8175426972909306e-05, "loss": 0.072, "step": 128500 }, { "epoch": 2.37, "learning_rate": 3.812941696113074e-05, "loss": 0.0694, "step": 129000 }, { "epoch": 2.38, "learning_rate": 3.808340694935218e-05, "loss": 0.0686, "step": 129500 }, { "epoch": 2.39, "learning_rate": 3.803739693757362e-05, "loss": 0.0631, "step": 130000 }, { "epoch": 2.4, "learning_rate": 3.799138692579505e-05, "loss": 0.074, "step": 130500 }, { "epoch": 2.41, "learning_rate": 3.794537691401649e-05, "loss": 0.073, "step": 131000 }, { "epoch": 2.42, "learning_rate": 3.7899366902237925e-05, "loss": 0.0688, "step": 131500 }, { "epoch": 2.43, "learning_rate": 3.785335689045937e-05, "loss": 0.0688, "step": 132000 }, { "epoch": 2.44, "learning_rate": 3.780734687868081e-05, "loss": 0.064, "step": 132500 }, { "epoch": 2.45, "learning_rate": 3.776133686690224e-05, "loss": 0.0658, "step": 133000 }, { "epoch": 2.46, "learning_rate": 3.7715326855123676e-05, "loss": 0.0726, "step": 133500 }, { "epoch": 2.47, "learning_rate": 3.7669316843345114e-05, "loss": 0.0678, "step": 134000 }, { "epoch": 2.48, "learning_rate": 3.762330683156655e-05, "loss": 0.0704, "step": 134500 }, { "epoch": 2.48, "learning_rate": 3.757729681978799e-05, "loss": 0.0714, "step": 135000 }, { "epoch": 2.49, "learning_rate": 3.753128680800943e-05, "loss": 0.0678, "step": 135500 }, { "epoch": 2.5, "learning_rate": 3.748527679623086e-05, "loss": 0.0709, "step": 136000 }, { "epoch": 2.51, "learning_rate": 3.7439266784452296e-05, "loss": 0.0709, "step": 136500 }, { "epoch": 2.52, "learning_rate": 3.7393256772673734e-05, "loss": 0.0671, "step": 137000 }, { "epoch": 2.53, "learning_rate": 3.734724676089517e-05, "loss": 0.0724, "step": 137500 }, { "epoch": 2.54, "learning_rate": 3.730123674911661e-05, "loss": 0.068, "step": 138000 }, { "epoch": 2.55, "learning_rate": 3.725522673733805e-05, "loss": 0.0727, "step": 138500 }, { "epoch": 2.56, "learning_rate": 3.7209216725559485e-05, "loss": 0.0704, "step": 139000 }, { "epoch": 2.57, "learning_rate": 3.7163206713780916e-05, "loss": 0.0714, "step": 139500 }, { "epoch": 2.58, "learning_rate": 3.711719670200235e-05, "loss": 0.0634, "step": 140000 }, { "epoch": 2.59, "learning_rate": 3.70711866902238e-05, "loss": 0.0703, "step": 140500 }, { "epoch": 2.59, "learning_rate": 3.7025176678445236e-05, "loss": 0.069, "step": 141000 }, { "epoch": 2.6, "learning_rate": 3.697916666666667e-05, "loss": 0.0731, "step": 141500 }, { "epoch": 2.61, "learning_rate": 3.6933156654888104e-05, "loss": 0.0644, "step": 142000 }, { "epoch": 2.62, "learning_rate": 3.688714664310954e-05, "loss": 0.0685, "step": 142500 }, { "epoch": 2.63, "learning_rate": 3.684113663133098e-05, "loss": 0.072, "step": 143000 }, { "epoch": 2.64, "learning_rate": 3.679512661955242e-05, "loss": 0.075, "step": 143500 }, { "epoch": 2.65, "learning_rate": 3.6749116607773855e-05, "loss": 0.0674, "step": 144000 }, { "epoch": 2.66, "learning_rate": 3.670310659599529e-05, "loss": 0.0703, "step": 144500 }, { "epoch": 2.67, "learning_rate": 3.6657096584216724e-05, "loss": 0.0705, "step": 145000 }, { "epoch": 2.68, "learning_rate": 3.661108657243816e-05, "loss": 0.0729, "step": 145500 }, { "epoch": 2.69, "learning_rate": 3.65650765606596e-05, "loss": 0.067, "step": 146000 }, { "epoch": 2.7, "learning_rate": 3.651906654888104e-05, "loss": 0.0694, "step": 146500 }, { "epoch": 2.71, "learning_rate": 3.6473056537102475e-05, "loss": 0.0692, "step": 147000 }, { "epoch": 2.71, "learning_rate": 3.642704652532391e-05, "loss": 0.0709, "step": 147500 }, { "epoch": 2.72, "learning_rate": 3.6381036513545344e-05, "loss": 0.0681, "step": 148000 }, { "epoch": 2.73, "learning_rate": 3.633502650176678e-05, "loss": 0.0703, "step": 148500 }, { "epoch": 2.74, "learning_rate": 3.6289016489988226e-05, "loss": 0.0633, "step": 149000 }, { "epoch": 2.75, "learning_rate": 3.6243006478209664e-05, "loss": 0.0664, "step": 149500 }, { "epoch": 2.76, "learning_rate": 3.61969964664311e-05, "loss": 0.0694, "step": 150000 }, { "epoch": 2.77, "learning_rate": 3.615098645465253e-05, "loss": 0.0716, "step": 150500 }, { "epoch": 2.78, "learning_rate": 3.610497644287397e-05, "loss": 0.0702, "step": 151000 }, { "epoch": 2.79, "learning_rate": 3.605896643109541e-05, "loss": 0.0743, "step": 151500 }, { "epoch": 2.8, "learning_rate": 3.6012956419316846e-05, "loss": 0.0705, "step": 152000 }, { "epoch": 2.81, "learning_rate": 3.596694640753828e-05, "loss": 0.0667, "step": 152500 }, { "epoch": 2.82, "learning_rate": 3.592093639575972e-05, "loss": 0.0662, "step": 153000 }, { "epoch": 2.83, "learning_rate": 3.587492638398116e-05, "loss": 0.0725, "step": 153500 }, { "epoch": 2.83, "learning_rate": 3.582891637220259e-05, "loss": 0.0665, "step": 154000 }, { "epoch": 2.84, "learning_rate": 3.578290636042403e-05, "loss": 0.0683, "step": 154500 }, { "epoch": 2.85, "learning_rate": 3.5736896348645465e-05, "loss": 0.0712, "step": 155000 }, { "epoch": 2.86, "learning_rate": 3.56908863368669e-05, "loss": 0.0703, "step": 155500 }, { "epoch": 2.87, "learning_rate": 3.564487632508834e-05, "loss": 0.0695, "step": 156000 }, { "epoch": 2.88, "learning_rate": 3.559886631330978e-05, "loss": 0.0672, "step": 156500 }, { "epoch": 2.89, "learning_rate": 3.555285630153121e-05, "loss": 0.0673, "step": 157000 }, { "epoch": 2.9, "learning_rate": 3.5506846289752654e-05, "loss": 0.0661, "step": 157500 }, { "epoch": 2.91, "learning_rate": 3.546083627797409e-05, "loss": 0.0694, "step": 158000 }, { "epoch": 2.92, "learning_rate": 3.541482626619553e-05, "loss": 0.0668, "step": 158500 }, { "epoch": 2.93, "learning_rate": 3.536881625441697e-05, "loss": 0.0725, "step": 159000 }, { "epoch": 2.94, "learning_rate": 3.53228062426384e-05, "loss": 0.0661, "step": 159500 }, { "epoch": 2.94, "learning_rate": 3.5276796230859836e-05, "loss": 0.073, "step": 160000 } ], "max_steps": 543360, "num_train_epochs": 10, "total_flos": 6.6892572131328e+17, "trial_name": null, "trial_params": null }