{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.325045768617493, "global_step": 15500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 9.952741020793951e-05, "loss": 2.3854, "step": 100 }, { "epoch": 0.09, "learning_rate": 9.905482041587902e-05, "loss": 2.3565, "step": 200 }, { "epoch": 0.14, "learning_rate": 9.858223062381853e-05, "loss": 2.288, "step": 300 }, { "epoch": 0.19, "learning_rate": 9.810964083175804e-05, "loss": 2.2879, "step": 400 }, { "epoch": 0.24, "learning_rate": 9.763705103969755e-05, "loss": 2.3212, "step": 500 }, { "epoch": 0.28, "learning_rate": 9.716446124763706e-05, "loss": 2.2266, "step": 600 }, { "epoch": 0.33, "learning_rate": 9.669187145557656e-05, "loss": 2.2894, "step": 700 }, { "epoch": 0.38, "learning_rate": 9.621928166351607e-05, "loss": 2.2738, "step": 800 }, { "epoch": 0.43, "learning_rate": 9.574669187145558e-05, "loss": 2.3342, "step": 900 }, { "epoch": 0.47, "learning_rate": 9.527410207939509e-05, "loss": 2.2613, "step": 1000 }, { "epoch": 0.52, "learning_rate": 9.48015122873346e-05, "loss": 2.1509, "step": 1100 }, { "epoch": 0.57, "learning_rate": 9.432892249527411e-05, "loss": 2.2683, "step": 1200 }, { "epoch": 0.61, "learning_rate": 9.385633270321362e-05, "loss": 2.2501, "step": 1300 }, { "epoch": 0.66, "learning_rate": 9.338374291115312e-05, "loss": 2.2351, "step": 1400 }, { "epoch": 0.71, "learning_rate": 9.291115311909263e-05, "loss": 2.2134, "step": 1500 }, { "epoch": 0.76, "learning_rate": 9.243856332703214e-05, "loss": 2.2317, "step": 1600 }, { "epoch": 0.8, "learning_rate": 9.196597353497165e-05, "loss": 2.226, "step": 1700 }, { "epoch": 0.85, "learning_rate": 9.149338374291116e-05, "loss": 2.1991, "step": 1800 }, { "epoch": 0.9, "learning_rate": 9.102079395085067e-05, "loss": 2.1648, "step": 1900 }, { "epoch": 0.94, "learning_rate": 9.054820415879018e-05, "loss": 2.1412, "step": 2000 }, { "epoch": 0.99, "learning_rate": 9.007561436672968e-05, "loss": 2.2009, "step": 2100 }, { "epoch": 1.04, "learning_rate": 8.960302457466919e-05, "loss": 2.205, "step": 2200 }, { "epoch": 1.09, "learning_rate": 8.91304347826087e-05, "loss": 2.1226, "step": 2300 }, { "epoch": 1.13, "learning_rate": 8.865784499054821e-05, "loss": 2.1963, "step": 2400 }, { "epoch": 1.18, "learning_rate": 8.818525519848772e-05, "loss": 2.0325, "step": 2500 }, { "epoch": 1.23, "learning_rate": 8.771266540642723e-05, "loss": 2.1478, "step": 2600 }, { "epoch": 1.28, "learning_rate": 8.724007561436674e-05, "loss": 2.1116, "step": 2700 }, { "epoch": 1.32, "learning_rate": 8.676748582230624e-05, "loss": 2.1805, "step": 2800 }, { "epoch": 1.37, "learning_rate": 8.629489603024575e-05, "loss": 2.109, "step": 2900 }, { "epoch": 1.42, "learning_rate": 8.582230623818526e-05, "loss": 2.1156, "step": 3000 }, { "epoch": 1.46, "learning_rate": 8.534971644612477e-05, "loss": 2.152, "step": 3100 }, { "epoch": 1.51, "learning_rate": 8.487712665406428e-05, "loss": 2.1512, "step": 3200 }, { "epoch": 1.56, "learning_rate": 8.440453686200379e-05, "loss": 2.1396, "step": 3300 }, { "epoch": 1.61, "learning_rate": 8.39319470699433e-05, "loss": 2.0951, "step": 3400 }, { "epoch": 1.65, "learning_rate": 8.34593572778828e-05, "loss": 2.1533, "step": 3500 }, { "epoch": 1.7, "learning_rate": 8.298676748582231e-05, "loss": 2.0959, "step": 3600 }, { "epoch": 1.75, "learning_rate": 8.251417769376182e-05, "loss": 2.1345, "step": 3700 }, { "epoch": 1.8, "learning_rate": 8.204158790170132e-05, "loss": 2.0643, "step": 3800 }, { "epoch": 1.84, "learning_rate": 8.156899810964084e-05, "loss": 2.0566, "step": 3900 }, { "epoch": 1.89, "learning_rate": 8.109640831758035e-05, "loss": 2.0729, "step": 4000 }, { "epoch": 1.94, "learning_rate": 8.062381852551986e-05, "loss": 2.0768, "step": 4100 }, { "epoch": 1.98, "learning_rate": 8.015122873345936e-05, "loss": 2.1026, "step": 4200 }, { "epoch": 2.03, "learning_rate": 7.967863894139886e-05, "loss": 2.0804, "step": 4300 }, { "epoch": 2.08, "learning_rate": 7.920604914933838e-05, "loss": 2.0875, "step": 4400 }, { "epoch": 2.13, "learning_rate": 7.873345935727789e-05, "loss": 2.039, "step": 4500 }, { "epoch": 2.17, "learning_rate": 7.82608695652174e-05, "loss": 2.0605, "step": 4600 }, { "epoch": 2.22, "learning_rate": 7.77882797731569e-05, "loss": 2.0495, "step": 4700 }, { "epoch": 2.27, "learning_rate": 7.731568998109642e-05, "loss": 2.0146, "step": 4800 }, { "epoch": 2.32, "learning_rate": 7.684310018903592e-05, "loss": 2.0378, "step": 4900 }, { "epoch": 2.36, "learning_rate": 7.637051039697543e-05, "loss": 2.0806, "step": 5000 }, { "epoch": 2.41, "learning_rate": 7.589792060491494e-05, "loss": 2.0675, "step": 5100 }, { "epoch": 2.46, "learning_rate": 7.542533081285445e-05, "loss": 2.0247, "step": 5200 }, { "epoch": 2.5, "learning_rate": 7.495274102079396e-05, "loss": 2.0442, "step": 5300 }, { "epoch": 2.55, "learning_rate": 7.448015122873347e-05, "loss": 2.0232, "step": 5400 }, { "epoch": 2.6, "learning_rate": 7.400756143667296e-05, "loss": 2.0648, "step": 5500 }, { "epoch": 2.65, "learning_rate": 7.353497164461248e-05, "loss": 2.0579, "step": 5600 }, { "epoch": 2.69, "learning_rate": 7.306238185255199e-05, "loss": 2.0438, "step": 5700 }, { "epoch": 2.74, "learning_rate": 7.25897920604915e-05, "loss": 2.0849, "step": 5800 }, { "epoch": 2.79, "learning_rate": 7.211720226843101e-05, "loss": 1.9828, "step": 5900 }, { "epoch": 2.84, "learning_rate": 7.16446124763705e-05, "loss": 1.9923, "step": 6000 }, { "epoch": 2.88, "learning_rate": 7.117202268431003e-05, "loss": 1.9898, "step": 6100 }, { "epoch": 2.93, "learning_rate": 7.069943289224953e-05, "loss": 2.0663, "step": 6200 }, { "epoch": 2.98, "learning_rate": 7.022684310018904e-05, "loss": 1.9967, "step": 6300 }, { "epoch": 3.02, "learning_rate": 6.975425330812855e-05, "loss": 1.9941, "step": 6400 }, { "epoch": 3.07, "learning_rate": 6.928166351606805e-05, "loss": 1.9458, "step": 6500 }, { "epoch": 3.12, "learning_rate": 6.880907372400757e-05, "loss": 2.0411, "step": 6600 }, { "epoch": 3.17, "learning_rate": 6.833648393194706e-05, "loss": 1.9634, "step": 6700 }, { "epoch": 3.21, "learning_rate": 6.786389413988659e-05, "loss": 1.9897, "step": 6800 }, { "epoch": 3.26, "learning_rate": 6.73913043478261e-05, "loss": 1.9798, "step": 6900 }, { "epoch": 3.31, "learning_rate": 6.691871455576559e-05, "loss": 2.0127, "step": 7000 }, { "epoch": 3.36, "learning_rate": 6.644612476370511e-05, "loss": 1.9827, "step": 7100 }, { "epoch": 3.4, "learning_rate": 6.59735349716446e-05, "loss": 1.9949, "step": 7200 }, { "epoch": 3.45, "learning_rate": 6.550094517958413e-05, "loss": 2.0152, "step": 7300 }, { "epoch": 3.5, "learning_rate": 6.502835538752364e-05, "loss": 1.959, "step": 7400 }, { "epoch": 3.54, "learning_rate": 6.455576559546313e-05, "loss": 1.9876, "step": 7500 }, { "epoch": 3.59, "learning_rate": 6.408317580340265e-05, "loss": 1.9955, "step": 7600 }, { "epoch": 3.64, "learning_rate": 6.361058601134215e-05, "loss": 1.9812, "step": 7700 }, { "epoch": 3.69, "learning_rate": 6.313799621928167e-05, "loss": 1.9219, "step": 7800 }, { "epoch": 3.73, "learning_rate": 6.266540642722118e-05, "loss": 1.9576, "step": 7900 }, { "epoch": 3.78, "learning_rate": 6.219281663516069e-05, "loss": 1.9608, "step": 8000 }, { "epoch": 3.83, "learning_rate": 6.17202268431002e-05, "loss": 1.9916, "step": 8100 }, { "epoch": 3.87, "learning_rate": 6.124763705103969e-05, "loss": 2.0394, "step": 8200 }, { "epoch": 3.92, "learning_rate": 6.0775047258979214e-05, "loss": 1.9416, "step": 8300 }, { "epoch": 3.97, "learning_rate": 6.0302457466918716e-05, "loss": 2.0283, "step": 8400 }, { "epoch": 4.02, "learning_rate": 5.982986767485823e-05, "loss": 2.0244, "step": 8500 }, { "epoch": 4.06, "learning_rate": 5.935727788279773e-05, "loss": 1.9132, "step": 8600 }, { "epoch": 4.11, "learning_rate": 5.888468809073724e-05, "loss": 1.9917, "step": 8700 }, { "epoch": 4.16, "learning_rate": 5.841209829867676e-05, "loss": 1.9355, "step": 8800 }, { "epoch": 4.21, "learning_rate": 5.793950850661626e-05, "loss": 1.9248, "step": 8900 }, { "epoch": 4.25, "learning_rate": 5.7466918714555774e-05, "loss": 2.0129, "step": 9000 }, { "epoch": 4.3, "learning_rate": 5.6994328922495276e-05, "loss": 1.9497, "step": 9100 }, { "epoch": 4.35, "learning_rate": 5.652173913043478e-05, "loss": 1.8806, "step": 9200 }, { "epoch": 4.39, "learning_rate": 5.604914933837429e-05, "loss": 1.9262, "step": 9300 }, { "epoch": 4.44, "learning_rate": 5.55765595463138e-05, "loss": 2.0024, "step": 9400 }, { "epoch": 4.49, "learning_rate": 5.510396975425332e-05, "loss": 1.9407, "step": 9500 }, { "epoch": 4.54, "learning_rate": 5.463137996219282e-05, "loss": 1.991, "step": 9600 }, { "epoch": 4.58, "learning_rate": 5.415879017013232e-05, "loss": 1.964, "step": 9700 }, { "epoch": 4.63, "learning_rate": 5.3686200378071836e-05, "loss": 1.9025, "step": 9800 }, { "epoch": 4.68, "learning_rate": 5.3213610586011344e-05, "loss": 1.9489, "step": 9900 }, { "epoch": 4.73, "learning_rate": 5.274102079395086e-05, "loss": 1.942, "step": 10000 }, { "epoch": 4.77, "learning_rate": 5.226843100189036e-05, "loss": 1.9868, "step": 10100 }, { "epoch": 4.82, "learning_rate": 5.179584120982986e-05, "loss": 1.9231, "step": 10200 }, { "epoch": 4.87, "learning_rate": 5.132325141776938e-05, "loss": 1.9625, "step": 10300 }, { "epoch": 4.91, "learning_rate": 5.085066162570889e-05, "loss": 1.9306, "step": 10400 }, { "epoch": 4.96, "learning_rate": 5.03780718336484e-05, "loss": 1.9106, "step": 10500 }, { "epoch": 5.01, "learning_rate": 4.9905482041587904e-05, "loss": 1.9209, "step": 10600 }, { "epoch": 5.06, "learning_rate": 4.943289224952741e-05, "loss": 1.925, "step": 10700 }, { "epoch": 5.1, "learning_rate": 4.896030245746692e-05, "loss": 1.9368, "step": 10800 }, { "epoch": 5.15, "learning_rate": 4.848771266540643e-05, "loss": 1.9076, "step": 10900 }, { "epoch": 5.2, "learning_rate": 4.801512287334594e-05, "loss": 1.8892, "step": 11000 }, { "epoch": 5.25, "learning_rate": 4.754253308128545e-05, "loss": 1.9667, "step": 11100 }, { "epoch": 5.29, "learning_rate": 4.7069943289224955e-05, "loss": 1.9371, "step": 11200 }, { "epoch": 5.34, "learning_rate": 4.6597353497164464e-05, "loss": 1.8549, "step": 11300 }, { "epoch": 5.39, "learning_rate": 4.612476370510397e-05, "loss": 1.9157, "step": 11400 }, { "epoch": 5.43, "learning_rate": 4.565217391304348e-05, "loss": 1.9051, "step": 11500 }, { "epoch": 5.48, "learning_rate": 4.517958412098299e-05, "loss": 1.9175, "step": 11600 }, { "epoch": 5.53, "learning_rate": 4.47069943289225e-05, "loss": 1.9367, "step": 11700 }, { "epoch": 5.58, "learning_rate": 4.423440453686201e-05, "loss": 1.8351, "step": 11800 }, { "epoch": 5.62, "learning_rate": 4.3761814744801515e-05, "loss": 1.8961, "step": 11900 }, { "epoch": 5.67, "learning_rate": 4.3289224952741024e-05, "loss": 1.8583, "step": 12000 }, { "epoch": 5.72, "learning_rate": 4.281663516068053e-05, "loss": 1.8909, "step": 12100 }, { "epoch": 5.77, "learning_rate": 4.234404536862004e-05, "loss": 1.8642, "step": 12200 }, { "epoch": 5.81, "learning_rate": 4.187145557655955e-05, "loss": 1.9229, "step": 12300 }, { "epoch": 5.86, "learning_rate": 4.139886578449906e-05, "loss": 1.9524, "step": 12400 }, { "epoch": 5.91, "learning_rate": 4.0926275992438567e-05, "loss": 1.9143, "step": 12500 }, { "epoch": 5.95, "learning_rate": 4.045368620037807e-05, "loss": 1.9229, "step": 12600 }, { "epoch": 6.0, "learning_rate": 3.9981096408317584e-05, "loss": 1.9368, "step": 12700 }, { "epoch": 6.05, "learning_rate": 3.950850661625709e-05, "loss": 1.9183, "step": 12800 }, { "epoch": 6.1, "learning_rate": 3.90359168241966e-05, "loss": 1.8864, "step": 12900 }, { "epoch": 6.14, "learning_rate": 3.856332703213611e-05, "loss": 1.8453, "step": 13000 }, { "epoch": 6.19, "learning_rate": 3.809073724007561e-05, "loss": 1.8785, "step": 13100 }, { "epoch": 6.24, "learning_rate": 3.7618147448015126e-05, "loss": 1.9145, "step": 13200 }, { "epoch": 6.29, "learning_rate": 3.7145557655954635e-05, "loss": 1.9477, "step": 13300 }, { "epoch": 6.33, "learning_rate": 3.6672967863894143e-05, "loss": 1.8378, "step": 13400 }, { "epoch": 6.38, "learning_rate": 3.620037807183365e-05, "loss": 1.843, "step": 13500 }, { "epoch": 6.43, "learning_rate": 3.572778827977316e-05, "loss": 1.8386, "step": 13600 }, { "epoch": 6.47, "learning_rate": 3.525519848771266e-05, "loss": 1.9, "step": 13700 }, { "epoch": 6.52, "learning_rate": 3.478260869565218e-05, "loss": 1.8437, "step": 13800 }, { "epoch": 6.57, "learning_rate": 3.4310018903591686e-05, "loss": 1.9301, "step": 13900 }, { "epoch": 6.62, "learning_rate": 3.3837429111531195e-05, "loss": 1.8447, "step": 14000 }, { "epoch": 6.66, "learning_rate": 3.33648393194707e-05, "loss": 1.8811, "step": 14100 }, { "epoch": 6.71, "learning_rate": 3.2892249527410205e-05, "loss": 1.8775, "step": 14200 }, { "epoch": 6.76, "learning_rate": 3.2419659735349714e-05, "loss": 1.9123, "step": 14300 }, { "epoch": 6.81, "learning_rate": 3.194706994328923e-05, "loss": 1.8236, "step": 14400 }, { "epoch": 6.85, "learning_rate": 3.147448015122874e-05, "loss": 1.8434, "step": 14500 }, { "epoch": 6.9, "learning_rate": 3.1001890359168246e-05, "loss": 1.8975, "step": 14600 }, { "epoch": 6.95, "learning_rate": 3.052930056710775e-05, "loss": 1.887, "step": 14700 }, { "epoch": 6.99, "learning_rate": 3.005671077504726e-05, "loss": 1.8665, "step": 14800 }, { "epoch": 7.04, "learning_rate": 2.9584120982986768e-05, "loss": 1.9142, "step": 14900 }, { "epoch": 7.09, "learning_rate": 2.9111531190926277e-05, "loss": 1.8892, "step": 15000 }, { "epoch": 7.14, "learning_rate": 2.863894139886579e-05, "loss": 1.8762, "step": 15100 }, { "epoch": 7.18, "learning_rate": 2.8166351606805297e-05, "loss": 1.8169, "step": 15200 }, { "epoch": 7.23, "learning_rate": 2.7693761814744802e-05, "loss": 1.8673, "step": 15300 }, { "epoch": 7.28, "learning_rate": 2.722117202268431e-05, "loss": 1.8615, "step": 15400 }, { "epoch": 7.33, "learning_rate": 2.674858223062382e-05, "loss": 1.8072, "step": 15500 } ], "max_steps": 21160, "num_train_epochs": 10, "total_flos": 1.1710826687824896e+17, "trial_name": null, "trial_params": null }