{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 1229, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 5.405405405405406e-05, "loss": 1.1849, "step": 20 }, { "epoch": 0.03, "learning_rate": 9.99984371133188e-05, "loss": 1.049, "step": 40 }, { "epoch": 0.05, "learning_rate": 9.990816464160552e-05, "loss": 1.0047, "step": 60 }, { "epoch": 0.07, "learning_rate": 9.967925545267647e-05, "loss": 1.0005, "step": 80 }, { "epoch": 0.08, "learning_rate": 9.931234541881273e-05, "loss": 0.9733, "step": 100 }, { "epoch": 0.1, "learning_rate": 9.880845375606788e-05, "loss": 0.9806, "step": 120 }, { "epoch": 0.11, "learning_rate": 9.81689801930526e-05, "loss": 0.9689, "step": 140 }, { "epoch": 0.13, "learning_rate": 9.739570108271755e-05, "loss": 0.9475, "step": 160 }, { "epoch": 0.15, "learning_rate": 9.649076446793543e-05, "loss": 0.9538, "step": 180 }, { "epoch": 0.16, "learning_rate": 9.545668411458928e-05, "loss": 0.946, "step": 200 }, { "epoch": 0.18, "learning_rate": 9.42963325287421e-05, "loss": 1.2086, "step": 220 }, { "epoch": 0.2, "learning_rate": 9.301293297728486e-05, "loss": 0.956, "step": 240 }, { "epoch": 0.21, "learning_rate": 9.161005053422832e-05, "loss": 0.9414, "step": 260 }, { "epoch": 0.23, "learning_rate": 9.009158217751071e-05, "loss": 0.9282, "step": 280 }, { "epoch": 0.24, "learning_rate": 8.846174596383044e-05, "loss": 0.9125, "step": 300 }, { "epoch": 0.26, "learning_rate": 8.672506931157457e-05, "loss": 0.9019, "step": 320 }, { "epoch": 0.28, "learning_rate": 8.488637642439121e-05, "loss": 0.9036, "step": 340 }, { "epoch": 0.29, "learning_rate": 8.295077489034092e-05, "loss": 0.8963, "step": 360 }, { "epoch": 0.31, "learning_rate": 8.092364149385254e-05, "loss": 0.8834, "step": 380 }, { "epoch": 0.33, "learning_rate": 7.881060727989571e-05, "loss": 0.8846, "step": 400 }, { "epoch": 0.34, "learning_rate": 7.661754191185898e-05, "loss": 0.8722, "step": 420 }, { "epoch": 0.36, "learning_rate": 7.435053736658517e-05, "loss": 0.8661, "step": 440 }, { "epoch": 0.37, "learning_rate": 7.201589101185622e-05, "loss": 0.8648, "step": 460 }, { "epoch": 0.39, "learning_rate": 6.962008811333567e-05, "loss": 0.8414, "step": 480 }, { "epoch": 0.41, "learning_rate": 6.716978381956145e-05, "loss": 0.8499, "step": 500 }, { "epoch": 0.42, "learning_rate": 6.467178467503203e-05, "loss": 0.8432, "step": 520 }, { "epoch": 0.44, "learning_rate": 6.213302971273911e-05, "loss": 0.8425, "step": 540 }, { "epoch": 0.46, "learning_rate": 5.956057117866922e-05, "loss": 0.8231, "step": 560 }, { "epoch": 0.47, "learning_rate": 5.696155494181792e-05, "loss": 0.8331, "step": 580 }, { "epoch": 0.49, "learning_rate": 5.434320064413464e-05, "loss": 0.8198, "step": 600 }, { "epoch": 0.5, "learning_rate": 5.1712781645538485e-05, "loss": 0.8124, "step": 620 }, { "epoch": 0.52, "learning_rate": 4.9077604819714065e-05, "loss": 0.8078, "step": 640 }, { "epoch": 0.54, "learning_rate": 4.644499025681171e-05, "loss": 0.816, "step": 660 }, { "epoch": 0.55, "learning_rate": 4.382225092943455e-05, "loss": 0.7996, "step": 680 }, { "epoch": 0.57, "learning_rate": 4.121667237839683e-05, "loss": 0.8034, "step": 700 }, { "epoch": 0.59, "learning_rate": 3.863549247468331e-05, "loss": 0.7956, "step": 720 }, { "epoch": 0.6, "learning_rate": 3.6085881313827754e-05, "loss": 0.7855, "step": 740 }, { "epoch": 0.62, "learning_rate": 3.357492129856037e-05, "loss": 0.7896, "step": 760 }, { "epoch": 0.63, "learning_rate": 3.110958746505165e-05, "loss": 0.7968, "step": 780 }, { "epoch": 0.65, "learning_rate": 2.8696728107402794e-05, "loss": 0.7792, "step": 800 }, { "epoch": 0.67, "learning_rate": 2.6343045754205053e-05, "loss": 0.7691, "step": 820 }, { "epoch": 0.68, "learning_rate": 2.4055078550011774e-05, "loss": 0.7663, "step": 840 }, { "epoch": 0.7, "learning_rate": 2.183918209344215e-05, "loss": 0.7622, "step": 860 }, { "epoch": 0.72, "learning_rate": 1.970151178236821e-05, "loss": 0.7635, "step": 880 }, { "epoch": 0.73, "learning_rate": 1.764800571522622e-05, "loss": 0.7565, "step": 900 }, { "epoch": 0.75, "learning_rate": 1.568436819595077e-05, "loss": 0.7531, "step": 920 }, { "epoch": 0.76, "learning_rate": 1.3816053888351405e-05, "loss": 0.7566, "step": 940 }, { "epoch": 0.78, "learning_rate": 1.2048252663949039e-05, "loss": 0.7593, "step": 960 }, { "epoch": 0.8, "learning_rate": 1.038587518536172e-05, "loss": 0.7548, "step": 980 }, { "epoch": 0.81, "learning_rate": 8.833539265287217e-06, "loss": 0.7468, "step": 1000 }, { "epoch": 0.83, "learning_rate": 7.395557038974749e-06, "loss": 0.7507, "step": 1020 }, { "epoch": 0.85, "learning_rate": 6.075922985818683e-06, "loss": 0.7552, "step": 1040 }, { "epoch": 0.86, "learning_rate": 4.8783028333481525e-06, "loss": 0.7534, "step": 1060 }, { "epoch": 0.88, "learning_rate": 3.8060233744356633e-06, "loss": 0.7528, "step": 1080 }, { "epoch": 0.9, "learning_rate": 2.8620632260107195e-06, "loss": 0.7431, "step": 1100 }, { "epoch": 0.91, "learning_rate": 2.0490445549492197e-06, "loss": 0.7524, "step": 1120 }, { "epoch": 0.93, "learning_rate": 1.3692257941228193e-06, "loss": 0.7412, "step": 1140 }, { "epoch": 0.94, "learning_rate": 8.244953688418822e-07, "loss": 0.7275, "step": 1160 }, { "epoch": 0.96, "learning_rate": 4.1636645111886583e-07, "loss": 0.7526, "step": 1180 }, { "epoch": 0.98, "learning_rate": 1.4597275632398655e-07, "loss": 0.7362, "step": 1200 }, { "epoch": 0.99, "learning_rate": 1.4065393909407087e-08, "loss": 0.7286, "step": 1220 }, { "epoch": 1.0, "step": 1229, "total_flos": 257039699804160.0, "train_loss": 0.845892040975895, "train_runtime": 30624.5115, "train_samples_per_second": 2.567, "train_steps_per_second": 0.04 } ], "max_steps": 1229, "num_train_epochs": 1, "total_flos": 257039699804160.0, "trial_name": null, "trial_params": null }