|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"global_step": 60120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.958416500332668e-05, |
|
"loss": 2.6853, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.916833000665336e-05, |
|
"loss": 2.3888, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8752495009980045e-05, |
|
"loss": 2.4518, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.833666001330672e-05, |
|
"loss": 2.4134, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.79208250166334e-05, |
|
"loss": 2.2574, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.750499001996008e-05, |
|
"loss": 2.1327, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.7089155023286765e-05, |
|
"loss": 2.2362, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.6673320026613444e-05, |
|
"loss": 2.1094, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.625748502994012e-05, |
|
"loss": 2.1622, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.58416500332668e-05, |
|
"loss": 2.0353, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.5425815036593486e-05, |
|
"loss": 2.0771, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.5009980039920164e-05, |
|
"loss": 2.1499, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.459414504324684e-05, |
|
"loss": 2.1235, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.417831004657352e-05, |
|
"loss": 2.061, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.3762475049900207e-05, |
|
"loss": 2.066, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.3346640053226885e-05, |
|
"loss": 2.055, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.2930805056553564e-05, |
|
"loss": 2.012, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.251497005988024e-05, |
|
"loss": 2.0656, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.209913506320692e-05, |
|
"loss": 2.1063, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1683300066533606e-05, |
|
"loss": 2.007, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.1267465069860284e-05, |
|
"loss": 2.0566, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.085163007318696e-05, |
|
"loss": 2.0895, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.043579507651364e-05, |
|
"loss": 1.9675, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.0019960079840326e-05, |
|
"loss": 2.0837, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.9604125083167e-05, |
|
"loss": 1.964, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.9188290086493677e-05, |
|
"loss": 2.024, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.877245508982036e-05, |
|
"loss": 1.9672, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.835662009314704e-05, |
|
"loss": 1.9733, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.794078509647372e-05, |
|
"loss": 2.0202, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.75249500998004e-05, |
|
"loss": 1.9932, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.7109115103127076e-05, |
|
"loss": 1.956, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.669328010645376e-05, |
|
"loss": 1.9439, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.627744510978044e-05, |
|
"loss": 2.024, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.586161011310712e-05, |
|
"loss": 1.9913, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.5445775116433796e-05, |
|
"loss": 1.8802, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.502994011976048e-05, |
|
"loss": 1.9544, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.461410512308716e-05, |
|
"loss": 1.9787, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 3.419827012641384e-05, |
|
"loss": 1.81, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.378243512974052e-05, |
|
"loss": 1.8795, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.33666001330672e-05, |
|
"loss": 1.9943, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.295076513639388e-05, |
|
"loss": 1.9593, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.253493013972056e-05, |
|
"loss": 1.8693, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.211909514304724e-05, |
|
"loss": 1.8688, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.170326014637392e-05, |
|
"loss": 1.9299, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.12874251497006e-05, |
|
"loss": 1.9862, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.087159015302728e-05, |
|
"loss": 1.9493, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.045575515635396e-05, |
|
"loss": 1.8195, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.003992015968064e-05, |
|
"loss": 2.0447, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 2.962408516300732e-05, |
|
"loss": 1.9297, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.9208250166334e-05, |
|
"loss": 1.8736, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.879241516966068e-05, |
|
"loss": 1.7594, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.837658017298736e-05, |
|
"loss": 1.913, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.796074517631404e-05, |
|
"loss": 1.8812, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.754491017964072e-05, |
|
"loss": 1.7825, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.71290751829674e-05, |
|
"loss": 1.9509, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.671324018629408e-05, |
|
"loss": 1.8853, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.629740518962076e-05, |
|
"loss": 1.9285, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.5881570192947442e-05, |
|
"loss": 1.9212, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.546573519627412e-05, |
|
"loss": 1.8891, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.5049900199600802e-05, |
|
"loss": 1.9006, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.4634065202927477e-05, |
|
"loss": 1.9119, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.421823020625416e-05, |
|
"loss": 1.8479, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.3802395209580838e-05, |
|
"loss": 1.9032, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.338656021290752e-05, |
|
"loss": 1.9279, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.2970725216234198e-05, |
|
"loss": 1.8265, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.255489021956088e-05, |
|
"loss": 1.8167, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.2139055222887558e-05, |
|
"loss": 1.9173, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.172322022621424e-05, |
|
"loss": 1.7094, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.130738522954092e-05, |
|
"loss": 1.8966, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.08915502328676e-05, |
|
"loss": 1.9069, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 2.047571523619428e-05, |
|
"loss": 1.8751, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.0059880239520957e-05, |
|
"loss": 1.8776, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.964404524284764e-05, |
|
"loss": 1.907, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.9228210246174318e-05, |
|
"loss": 1.8639, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.8812375249501e-05, |
|
"loss": 1.8729, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.8396540252827678e-05, |
|
"loss": 1.8241, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.798070525615436e-05, |
|
"loss": 1.7862, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.756487025948104e-05, |
|
"loss": 1.8483, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 1.714903526280772e-05, |
|
"loss": 1.9772, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.67332002661344e-05, |
|
"loss": 1.825, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.631736526946108e-05, |
|
"loss": 1.9203, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.590153027278776e-05, |
|
"loss": 1.8294, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.548569527611444e-05, |
|
"loss": 1.7971, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.506986027944112e-05, |
|
"loss": 1.7856, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 1.4654025282767798e-05, |
|
"loss": 1.834, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.4238190286094476e-05, |
|
"loss": 1.7484, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 1.3822355289421157e-05, |
|
"loss": 1.7906, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.3406520292747837e-05, |
|
"loss": 1.8818, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.2990685296074517e-05, |
|
"loss": 1.9182, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.2574850299401197e-05, |
|
"loss": 1.8451, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.2159015302727877e-05, |
|
"loss": 1.8165, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 1.1743180306054557e-05, |
|
"loss": 1.8486, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.1327345309381238e-05, |
|
"loss": 1.6533, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.0911510312707918e-05, |
|
"loss": 1.7231, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 1.0495675316034598e-05, |
|
"loss": 1.884, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 1.0079840319361278e-05, |
|
"loss": 1.7961, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 9.664005322687958e-06, |
|
"loss": 1.8359, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 9.248170326014638e-06, |
|
"loss": 1.7858, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 8.832335329341319e-06, |
|
"loss": 1.8043, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 8.416500332667999e-06, |
|
"loss": 1.9039, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 8.000665335994679e-06, |
|
"loss": 1.7695, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 7.584830339321358e-06, |
|
"loss": 1.8137, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 7.168995342648038e-06, |
|
"loss": 1.7972, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 6.753160345974717e-06, |
|
"loss": 1.8118, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 6.337325349301397e-06, |
|
"loss": 1.8511, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 5.921490352628078e-06, |
|
"loss": 1.8975, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 5.505655355954757e-06, |
|
"loss": 1.7506, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 5.0898203592814375e-06, |
|
"loss": 1.7564, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 4.673985362608118e-06, |
|
"loss": 1.9306, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 4.258150365934797e-06, |
|
"loss": 1.7743, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 3.842315369261477e-06, |
|
"loss": 1.8154, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 3.4264803725881573e-06, |
|
"loss": 1.7763, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 3.010645375914837e-06, |
|
"loss": 1.7074, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 2.594810379241517e-06, |
|
"loss": 1.8081, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 2.178975382568197e-06, |
|
"loss": 1.7563, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1.7631403858948771e-06, |
|
"loss": 1.818, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 1.3473053892215569e-06, |
|
"loss": 1.7706, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 9.314703925482368e-07, |
|
"loss": 1.8389, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 5.156353958749169e-07, |
|
"loss": 1.8149, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 9.98003992015968e-08, |
|
"loss": 1.7843, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 60120, |
|
"total_flos": 3.025664452067328e+16, |
|
"train_runtime": 9052.4232, |
|
"train_samples_per_second": 6.641 |
|
} |
|
], |
|
"max_steps": 60120, |
|
"num_train_epochs": 5, |
|
"total_flos": 3.025664452067328e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|