{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 58011, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.956904724965955e-05, "loss": 1.7071, "step": 500 }, { "epoch": 0.05, "learning_rate": 4.91380944993191e-05, "loss": 1.6086, "step": 1000 }, { "epoch": 0.08, "learning_rate": 4.8707141748978643e-05, "loss": 1.5292, "step": 1500 }, { "epoch": 0.1, "learning_rate": 4.8276188998638195e-05, "loss": 1.475, "step": 2000 }, { "epoch": 0.13, "learning_rate": 4.784523624829774e-05, "loss": 1.4358, "step": 2500 }, { "epoch": 0.16, "learning_rate": 4.7414283497957284e-05, "loss": 1.4158, "step": 3000 }, { "epoch": 0.18, "learning_rate": 4.6983330747616836e-05, "loss": 1.3844, "step": 3500 }, { "epoch": 0.21, "learning_rate": 4.655237799727638e-05, "loss": 1.3696, "step": 4000 }, { "epoch": 0.23, "learning_rate": 4.6121425246935925e-05, "loss": 1.33, "step": 4500 }, { "epoch": 0.26, "learning_rate": 4.569047249659548e-05, "loss": 1.2824, "step": 5000 }, { "epoch": 0.28, "learning_rate": 4.525951974625502e-05, "loss": 1.2842, "step": 5500 }, { "epoch": 0.31, "learning_rate": 4.482856699591457e-05, "loss": 1.282, "step": 6000 }, { "epoch": 0.34, "learning_rate": 4.439761424557412e-05, "loss": 1.2593, "step": 6500 }, { "epoch": 0.36, "learning_rate": 4.396666149523366e-05, "loss": 1.238, "step": 7000 }, { "epoch": 0.39, "learning_rate": 4.3535708744893214e-05, "loss": 1.2322, "step": 7500 }, { "epoch": 0.41, "learning_rate": 4.310475599455276e-05, "loss": 1.2107, "step": 8000 }, { "epoch": 0.44, "learning_rate": 4.2673803244212304e-05, "loss": 1.2007, "step": 8500 }, { "epoch": 0.47, "learning_rate": 4.2242850493871855e-05, "loss": 1.1947, "step": 9000 }, { "epoch": 0.49, "learning_rate": 4.18118977435314e-05, "loss": 1.1792, "step": 9500 }, { "epoch": 0.52, "learning_rate": 4.138094499319095e-05, "loss": 1.1771, "step": 10000 }, { "epoch": 0.54, "learning_rate": 4.0949992242850496e-05, "loss": 1.1634, "step": 10500 }, { "epoch": 0.57, "learning_rate": 4.051903949251004e-05, "loss": 1.1487, "step": 11000 }, { "epoch": 0.59, "learning_rate": 4.008808674216959e-05, "loss": 1.1607, "step": 11500 }, { "epoch": 0.62, "learning_rate": 3.965713399182914e-05, "loss": 1.1333, "step": 12000 }, { "epoch": 0.65, "learning_rate": 3.922618124148869e-05, "loss": 1.1344, "step": 12500 }, { "epoch": 0.67, "learning_rate": 3.8795228491148233e-05, "loss": 1.1319, "step": 13000 }, { "epoch": 0.7, "learning_rate": 3.836427574080778e-05, "loss": 1.1236, "step": 13500 }, { "epoch": 0.72, "learning_rate": 3.793332299046733e-05, "loss": 1.1088, "step": 14000 }, { "epoch": 0.75, "learning_rate": 3.7502370240126874e-05, "loss": 1.1135, "step": 14500 }, { "epoch": 0.78, "learning_rate": 3.707141748978642e-05, "loss": 1.0985, "step": 15000 }, { "epoch": 0.8, "learning_rate": 3.664046473944597e-05, "loss": 1.0886, "step": 15500 }, { "epoch": 0.83, "learning_rate": 3.6209511989105515e-05, "loss": 1.0961, "step": 16000 }, { "epoch": 0.85, "learning_rate": 3.577855923876507e-05, "loss": 1.0846, "step": 16500 }, { "epoch": 0.88, "learning_rate": 3.534760648842461e-05, "loss": 1.0638, "step": 17000 }, { "epoch": 0.91, "learning_rate": 3.4916653738084156e-05, "loss": 1.0757, "step": 17500 }, { "epoch": 0.93, "learning_rate": 3.448570098774371e-05, "loss": 1.0676, "step": 18000 }, { "epoch": 0.96, "learning_rate": 3.405474823740325e-05, "loss": 1.0547, "step": 18500 }, { "epoch": 0.98, "learning_rate": 3.36237954870628e-05, "loss": 1.0657, "step": 19000 }, { "epoch": 1.01, "learning_rate": 3.319284273672235e-05, "loss": 1.0545, "step": 19500 }, { "epoch": 1.03, "learning_rate": 3.2761889986381894e-05, "loss": 1.0325, "step": 20000 }, { "epoch": 1.06, "learning_rate": 3.2330937236041445e-05, "loss": 1.0263, "step": 20500 }, { "epoch": 1.09, "learning_rate": 3.189998448570099e-05, "loss": 1.0273, "step": 21000 }, { "epoch": 1.11, "learning_rate": 3.1469031735360535e-05, "loss": 1.0176, "step": 21500 }, { "epoch": 1.14, "learning_rate": 3.1038078985020086e-05, "loss": 1.0119, "step": 22000 }, { "epoch": 1.16, "learning_rate": 3.060712623467963e-05, "loss": 1.0184, "step": 22500 }, { "epoch": 1.19, "learning_rate": 3.0176173484339176e-05, "loss": 1.0226, "step": 23000 }, { "epoch": 1.22, "learning_rate": 2.9745220733998724e-05, "loss": 0.9991, "step": 23500 }, { "epoch": 1.24, "learning_rate": 2.9314267983658272e-05, "loss": 0.9955, "step": 24000 }, { "epoch": 1.27, "learning_rate": 2.888331523331782e-05, "loss": 0.9834, "step": 24500 }, { "epoch": 1.29, "learning_rate": 2.8452362482977368e-05, "loss": 0.9906, "step": 25000 }, { "epoch": 1.32, "learning_rate": 2.8021409732636916e-05, "loss": 0.988, "step": 25500 }, { "epoch": 1.34, "learning_rate": 2.7590456982296464e-05, "loss": 0.9742, "step": 26000 }, { "epoch": 1.37, "learning_rate": 2.7159504231956013e-05, "loss": 0.9712, "step": 26500 }, { "epoch": 1.4, "learning_rate": 2.6728551481615554e-05, "loss": 0.9755, "step": 27000 }, { "epoch": 1.42, "learning_rate": 2.6297598731275102e-05, "loss": 0.9807, "step": 27500 }, { "epoch": 1.45, "learning_rate": 2.586664598093465e-05, "loss": 0.9752, "step": 28000 }, { "epoch": 1.47, "learning_rate": 2.54356932305942e-05, "loss": 0.9714, "step": 28500 }, { "epoch": 1.5, "learning_rate": 2.5004740480253746e-05, "loss": 0.9579, "step": 29000 }, { "epoch": 1.53, "learning_rate": 2.4573787729913295e-05, "loss": 0.9462, "step": 29500 }, { "epoch": 1.55, "learning_rate": 2.414283497957284e-05, "loss": 0.9435, "step": 30000 }, { "epoch": 1.58, "learning_rate": 2.3711882229232387e-05, "loss": 0.9429, "step": 30500 }, { "epoch": 1.6, "learning_rate": 2.3280929478891936e-05, "loss": 0.9521, "step": 31000 }, { "epoch": 1.63, "learning_rate": 2.2849976728551484e-05, "loss": 0.9409, "step": 31500 }, { "epoch": 1.65, "learning_rate": 2.241902397821103e-05, "loss": 0.9434, "step": 32000 }, { "epoch": 1.68, "learning_rate": 2.1988071227870577e-05, "loss": 0.9357, "step": 32500 }, { "epoch": 1.71, "learning_rate": 2.1557118477530125e-05, "loss": 0.9256, "step": 33000 }, { "epoch": 1.73, "learning_rate": 2.1126165727189673e-05, "loss": 0.9379, "step": 33500 }, { "epoch": 1.76, "learning_rate": 2.0695212976849218e-05, "loss": 0.9346, "step": 34000 }, { "epoch": 1.78, "learning_rate": 2.0264260226508766e-05, "loss": 0.9238, "step": 34500 }, { "epoch": 1.81, "learning_rate": 1.9833307476168314e-05, "loss": 0.9266, "step": 35000 }, { "epoch": 1.84, "learning_rate": 1.9402354725827862e-05, "loss": 0.924, "step": 35500 }, { "epoch": 1.86, "learning_rate": 1.8971401975487407e-05, "loss": 0.9082, "step": 36000 }, { "epoch": 1.89, "learning_rate": 1.8540449225146955e-05, "loss": 0.9124, "step": 36500 }, { "epoch": 1.91, "learning_rate": 1.8109496474806503e-05, "loss": 0.9265, "step": 37000 }, { "epoch": 1.94, "learning_rate": 1.767854372446605e-05, "loss": 0.9153, "step": 37500 }, { "epoch": 1.97, "learning_rate": 1.7247590974125596e-05, "loss": 0.9056, "step": 38000 }, { "epoch": 1.99, "learning_rate": 1.6816638223785144e-05, "loss": 0.8929, "step": 38500 }, { "epoch": 2.02, "learning_rate": 1.6385685473444692e-05, "loss": 0.8964, "step": 39000 }, { "epoch": 2.04, "learning_rate": 1.595473272310424e-05, "loss": 0.8852, "step": 39500 }, { "epoch": 2.07, "learning_rate": 1.5523779972763785e-05, "loss": 0.8845, "step": 40000 }, { "epoch": 2.09, "learning_rate": 1.5092827222423333e-05, "loss": 0.9005, "step": 40500 }, { "epoch": 2.12, "learning_rate": 1.4661874472082881e-05, "loss": 0.8803, "step": 41000 }, { "epoch": 2.15, "learning_rate": 1.423092172174243e-05, "loss": 0.8864, "step": 41500 }, { "epoch": 2.17, "learning_rate": 1.3799968971401977e-05, "loss": 0.8821, "step": 42000 }, { "epoch": 2.2, "learning_rate": 1.3369016221061522e-05, "loss": 0.887, "step": 42500 }, { "epoch": 2.22, "learning_rate": 1.293806347072107e-05, "loss": 0.8738, "step": 43000 }, { "epoch": 2.25, "learning_rate": 1.2507110720380618e-05, "loss": 0.8776, "step": 43500 }, { "epoch": 2.28, "learning_rate": 1.2076157970040165e-05, "loss": 0.8589, "step": 44000 }, { "epoch": 2.3, "learning_rate": 1.1645205219699713e-05, "loss": 0.8543, "step": 44500 }, { "epoch": 2.33, "learning_rate": 1.121425246935926e-05, "loss": 0.8584, "step": 45000 }, { "epoch": 2.35, "learning_rate": 1.0783299719018808e-05, "loss": 0.8614, "step": 45500 }, { "epoch": 2.38, "learning_rate": 1.0352346968678354e-05, "loss": 0.8537, "step": 46000 }, { "epoch": 2.4, "learning_rate": 9.921394218337902e-06, "loss": 0.8625, "step": 46500 }, { "epoch": 2.43, "learning_rate": 9.490441467997449e-06, "loss": 0.8532, "step": 47000 }, { "epoch": 2.46, "learning_rate": 9.059488717656997e-06, "loss": 0.8559, "step": 47500 }, { "epoch": 2.48, "learning_rate": 8.628535967316543e-06, "loss": 0.8424, "step": 48000 }, { "epoch": 2.51, "learning_rate": 8.197583216976091e-06, "loss": 0.8475, "step": 48500 }, { "epoch": 2.53, "learning_rate": 7.766630466635638e-06, "loss": 0.8525, "step": 49000 }, { "epoch": 2.56, "learning_rate": 7.335677716295186e-06, "loss": 0.8457, "step": 49500 }, { "epoch": 2.59, "learning_rate": 6.904724965954733e-06, "loss": 0.8503, "step": 50000 }, { "epoch": 2.61, "learning_rate": 6.4737722156142804e-06, "loss": 0.8469, "step": 50500 }, { "epoch": 2.64, "learning_rate": 6.042819465273828e-06, "loss": 0.8416, "step": 51000 }, { "epoch": 2.66, "learning_rate": 5.611866714933375e-06, "loss": 0.8463, "step": 51500 }, { "epoch": 2.69, "learning_rate": 5.180913964592922e-06, "loss": 0.843, "step": 52000 }, { "epoch": 2.72, "learning_rate": 4.7499612142524696e-06, "loss": 0.8298, "step": 52500 }, { "epoch": 2.74, "learning_rate": 4.319008463912017e-06, "loss": 0.8335, "step": 53000 }, { "epoch": 2.77, "learning_rate": 3.888055713571564e-06, "loss": 0.8363, "step": 53500 }, { "epoch": 2.79, "learning_rate": 3.457102963231112e-06, "loss": 0.834, "step": 54000 }, { "epoch": 2.82, "learning_rate": 3.0261502128906587e-06, "loss": 0.8289, "step": 54500 }, { "epoch": 2.84, "learning_rate": 2.595197462550206e-06, "loss": 0.8384, "step": 55000 }, { "epoch": 2.87, "learning_rate": 2.1642447122097533e-06, "loss": 0.8255, "step": 55500 }, { "epoch": 2.9, "learning_rate": 1.7332919618693005e-06, "loss": 0.825, "step": 56000 }, { "epoch": 2.92, "learning_rate": 1.302339211528848e-06, "loss": 0.8398, "step": 56500 }, { "epoch": 2.95, "learning_rate": 8.713864611883953e-07, "loss": 0.8264, "step": 57000 }, { "epoch": 2.97, "learning_rate": 4.4043371084794265e-07, "loss": 0.8196, "step": 57500 }, { "epoch": 3.0, "learning_rate": 9.48096050748996e-09, "loss": 0.8307, "step": 58000 }, { "epoch": 3.0, "step": 58011, "total_flos": 5.4106824484952474e+17, "train_loss": 1.0139114455800622, "train_runtime": 59211.1926, "train_samples_per_second": 9.797, "train_steps_per_second": 0.98 } ], "max_steps": 58011, "num_train_epochs": 3, "total_flos": 5.4106824484952474e+17, "trial_name": null, "trial_params": null }