|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3022602648120904, |
|
"global_step": 113000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9973251303998932e-05, |
|
"loss": 3.5144, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9946502607997863e-05, |
|
"loss": 2.2536, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.991975391199679e-05, |
|
"loss": 2.0076, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9893005215995724e-05, |
|
"loss": 1.77, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.986625651999465e-05, |
|
"loss": 1.6057, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.983950782399358e-05, |
|
"loss": 1.4921, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.981275912799251e-05, |
|
"loss": 1.4723, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9786010431991442e-05, |
|
"loss": 1.3909, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9759261735990373e-05, |
|
"loss": 1.3073, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9732513039989303e-05, |
|
"loss": 1.1996, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.970576434398823e-05, |
|
"loss": 1.1979, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9679015647987164e-05, |
|
"loss": 1.1541, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.965226695198609e-05, |
|
"loss": 1.1183, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.962551825598502e-05, |
|
"loss": 1.0842, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9598769559983952e-05, |
|
"loss": 1.0402, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9572020863982883e-05, |
|
"loss": 1.0074, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9545272167981813e-05, |
|
"loss": 1.0338, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.951852347198074e-05, |
|
"loss": 0.9924, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9491774775979674e-05, |
|
"loss": 0.9631, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.94650260799786e-05, |
|
"loss": 0.9394, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.943827738397753e-05, |
|
"loss": 0.9232, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9411528687976462e-05, |
|
"loss": 0.8787, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9384779991975393e-05, |
|
"loss": 0.8995, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9358031295974323e-05, |
|
"loss": 0.8935, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9331282599973254e-05, |
|
"loss": 0.8847, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9304533903972184e-05, |
|
"loss": 0.8572, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9277785207971115e-05, |
|
"loss": 0.8075, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.925103651197004e-05, |
|
"loss": 0.8435, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9224287815968975e-05, |
|
"loss": 0.8183, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9197539119967903e-05, |
|
"loss": 0.8101, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9170790423966833e-05, |
|
"loss": 0.7885, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9144041727965764e-05, |
|
"loss": 0.7753, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9117293031964694e-05, |
|
"loss": 0.797, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9090544335963624e-05, |
|
"loss": 0.784, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.906379563996255e-05, |
|
"loss": 0.7849, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9037046943961482e-05, |
|
"loss": 0.7358, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9010298247960413e-05, |
|
"loss": 0.7414, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8983549551959343e-05, |
|
"loss": 0.7606, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8956800855958274e-05, |
|
"loss": 0.7404, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8930052159957204e-05, |
|
"loss": 0.7261, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8903303463956134e-05, |
|
"loss": 0.7436, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8876554767955065e-05, |
|
"loss": 0.7183, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8849806071953992e-05, |
|
"loss": 0.7043, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8823057375952926e-05, |
|
"loss": 0.7424, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8796308679951853e-05, |
|
"loss": 0.7286, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8769559983950784e-05, |
|
"loss": 0.7385, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8742811287949714e-05, |
|
"loss": 0.7158, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8716062591948644e-05, |
|
"loss": 0.7042, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8689313895947575e-05, |
|
"loss": 0.6892, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8662565199946505e-05, |
|
"loss": 0.729, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8635816503945433e-05, |
|
"loss": 0.6994, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8609067807944363e-05, |
|
"loss": 0.6768, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8582319111943294e-05, |
|
"loss": 0.6858, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8555570415942224e-05, |
|
"loss": 0.693, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8528821719941154e-05, |
|
"loss": 0.6863, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8502073023940085e-05, |
|
"loss": 0.6703, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8475324327939015e-05, |
|
"loss": 0.6785, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8448575631937943e-05, |
|
"loss": 0.6847, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8421826935936876e-05, |
|
"loss": 0.6583, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8395078239935803e-05, |
|
"loss": 0.6659, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8368329543934734e-05, |
|
"loss": 0.7055, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8341580847933664e-05, |
|
"loss": 0.6751, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8314832151932595e-05, |
|
"loss": 0.6628, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8288083455931525e-05, |
|
"loss": 0.6568, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8261334759930456e-05, |
|
"loss": 0.694, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8234586063929386e-05, |
|
"loss": 0.663, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8207837367928317e-05, |
|
"loss": 0.6523, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8181088671927244e-05, |
|
"loss": 0.6485, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8154339975926174e-05, |
|
"loss": 0.6472, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8127591279925105e-05, |
|
"loss": 0.6857, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8100842583924035e-05, |
|
"loss": 0.6471, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8074093887922966e-05, |
|
"loss": 0.6687, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8047345191921893e-05, |
|
"loss": 0.6438, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8020596495920827e-05, |
|
"loss": 0.6579, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.7993847799919754e-05, |
|
"loss": 0.657, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.7967099103918684e-05, |
|
"loss": 0.6459, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.7940350407917615e-05, |
|
"loss": 0.644, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.7913601711916545e-05, |
|
"loss": 0.6419, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7886853015915476e-05, |
|
"loss": 0.6392, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7860104319914406e-05, |
|
"loss": 0.6446, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7833355623913337e-05, |
|
"loss": 0.6546, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7806606927912267e-05, |
|
"loss": 0.6212, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7779858231911194e-05, |
|
"loss": 0.6387, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7753109535910128e-05, |
|
"loss": 0.6368, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7726360839909055e-05, |
|
"loss": 0.6526, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7699612143907986e-05, |
|
"loss": 0.6342, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7672863447906916e-05, |
|
"loss": 0.6234, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7646114751905843e-05, |
|
"loss": 0.6028, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7619366055904777e-05, |
|
"loss": 0.6556, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7592617359903704e-05, |
|
"loss": 0.625, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7565868663902635e-05, |
|
"loss": 0.6198, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7539119967901565e-05, |
|
"loss": 0.6538, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7512371271900496e-05, |
|
"loss": 0.6265, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7485622575899426e-05, |
|
"loss": 0.6375, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7458873879898357e-05, |
|
"loss": 0.6325, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7432125183897287e-05, |
|
"loss": 0.6258, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7405376487896218e-05, |
|
"loss": 0.6235, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7378627791895145e-05, |
|
"loss": 0.6386, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.735187909589408e-05, |
|
"loss": 0.6243, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7325130399893006e-05, |
|
"loss": 0.6471, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7298381703891936e-05, |
|
"loss": 0.6396, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7271633007890867e-05, |
|
"loss": 0.6164, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7244884311889797e-05, |
|
"loss": 0.5981, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7218135615888728e-05, |
|
"loss": 0.6505, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7191386919887655e-05, |
|
"loss": 0.651, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.716463822388659e-05, |
|
"loss": 0.629, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7137889527885516e-05, |
|
"loss": 0.6393, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7111140831884446e-05, |
|
"loss": 0.6168, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.7084392135883377e-05, |
|
"loss": 0.6253, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.7057643439882307e-05, |
|
"loss": 0.625, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.7030894743881238e-05, |
|
"loss": 0.6131, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.7004146047880168e-05, |
|
"loss": 0.6018, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.6977397351879095e-05, |
|
"loss": 0.6389, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.695064865587803e-05, |
|
"loss": 0.6068, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.6923899959876956e-05, |
|
"loss": 0.6238, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6897151263875887e-05, |
|
"loss": 0.6017, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6870402567874817e-05, |
|
"loss": 0.6052, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6843653871873748e-05, |
|
"loss": 0.6021, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6816905175872678e-05, |
|
"loss": 0.6168, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6790156479871605e-05, |
|
"loss": 0.6086, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.676340778387054e-05, |
|
"loss": 0.5905, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6736659087869466e-05, |
|
"loss": 0.6268, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6709910391868397e-05, |
|
"loss": 0.626, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.6683161695867327e-05, |
|
"loss": 0.631, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.6656412999866258e-05, |
|
"loss": 0.5936, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.6629664303865188e-05, |
|
"loss": 0.6137, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.660291560786412e-05, |
|
"loss": 0.6185, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.6576166911863046e-05, |
|
"loss": 0.6009, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.654941821586198e-05, |
|
"loss": 0.6038, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.6522669519860907e-05, |
|
"loss": 0.6081, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6495920823859837e-05, |
|
"loss": 0.6032, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6469172127858768e-05, |
|
"loss": 0.6166, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6442423431857698e-05, |
|
"loss": 0.6125, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.641567473585663e-05, |
|
"loss": 0.5948, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.638892603985556e-05, |
|
"loss": 0.5957, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.636217734385449e-05, |
|
"loss": 0.5965, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6335428647853417e-05, |
|
"loss": 0.5997, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6308679951852347e-05, |
|
"loss": 0.5736, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6281931255851278e-05, |
|
"loss": 0.6116, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6255182559850208e-05, |
|
"loss": 0.6081, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.622843386384914e-05, |
|
"loss": 0.5891, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.620168516784807e-05, |
|
"loss": 0.5942, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6174936471847e-05, |
|
"loss": 0.6041, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.614818777584593e-05, |
|
"loss": 0.5901, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6121439079844857e-05, |
|
"loss": 0.5926, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.609469038384379e-05, |
|
"loss": 0.6103, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.6067941687842718e-05, |
|
"loss": 0.6269, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.604119299184165e-05, |
|
"loss": 0.5939, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.601444429584058e-05, |
|
"loss": 0.6014, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.598769559983951e-05, |
|
"loss": 0.5945, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.596094690383844e-05, |
|
"loss": 0.6079, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.593419820783737e-05, |
|
"loss": 0.5995, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.5907449511836298e-05, |
|
"loss": 0.5761, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5880700815835228e-05, |
|
"loss": 0.6073, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.585395211983416e-05, |
|
"loss": 0.6005, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.582720342383309e-05, |
|
"loss": 0.6059, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.580045472783202e-05, |
|
"loss": 0.5678, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.577370603183095e-05, |
|
"loss": 0.6119, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.574695733582988e-05, |
|
"loss": 0.5892, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5720208639828808e-05, |
|
"loss": 0.5889, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.569345994382774e-05, |
|
"loss": 0.5922, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.566671124782667e-05, |
|
"loss": 0.582, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.56399625518256e-05, |
|
"loss": 0.5695, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.561321385582453e-05, |
|
"loss": 0.5804, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.558646515982346e-05, |
|
"loss": 0.5809, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.555971646382239e-05, |
|
"loss": 0.5828, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.553296776782132e-05, |
|
"loss": 0.5754, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.5506219071820248e-05, |
|
"loss": 0.5772, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5479470375819182e-05, |
|
"loss": 0.5668, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.545272167981811e-05, |
|
"loss": 0.5886, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.542597298381704e-05, |
|
"loss": 0.5698, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.539922428781597e-05, |
|
"loss": 0.5891, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.53724755918149e-05, |
|
"loss": 0.5818, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.534572689581383e-05, |
|
"loss": 0.58, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5318978199812758e-05, |
|
"loss": 0.5969, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5292229503811692e-05, |
|
"loss": 0.579, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.526548080781062e-05, |
|
"loss": 0.5803, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5238732111809551e-05, |
|
"loss": 0.5756, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.521198341580848e-05, |
|
"loss": 0.5794, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.518523471980741e-05, |
|
"loss": 0.5718, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5158486023806341e-05, |
|
"loss": 0.5849, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5131737327805272e-05, |
|
"loss": 0.5704, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.51049886318042e-05, |
|
"loss": 0.5817, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.5078239935803133e-05, |
|
"loss": 0.5716, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.5051491239802061e-05, |
|
"loss": 0.5957, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.5024742543800992e-05, |
|
"loss": 0.5686, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.499799384779992e-05, |
|
"loss": 0.5764, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.497124515179885e-05, |
|
"loss": 0.5837, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.4944496455797782e-05, |
|
"loss": 0.5668, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.491774775979671e-05, |
|
"loss": 0.579, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.489099906379564e-05, |
|
"loss": 0.5738, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.486425036779457e-05, |
|
"loss": 0.5675, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4837501671793502e-05, |
|
"loss": 0.5844, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.481075297579243e-05, |
|
"loss": 0.5907, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4784004279791361e-05, |
|
"loss": 0.5837, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4757255583790292e-05, |
|
"loss": 0.5634, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4730506887789222e-05, |
|
"loss": 0.5743, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.470375819178815e-05, |
|
"loss": 0.5821, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4677009495787083e-05, |
|
"loss": 0.5779, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4650260799786012e-05, |
|
"loss": 0.569, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4623512103784942e-05, |
|
"loss": 0.5907, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4596763407783871e-05, |
|
"loss": 0.5851, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4570014711782803e-05, |
|
"loss": 0.5933, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4543266015781732e-05, |
|
"loss": 0.5823, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.451651731978066e-05, |
|
"loss": 0.5701, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4489768623779591e-05, |
|
"loss": 0.5872, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4463019927778522e-05, |
|
"loss": 0.5675, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4436271231777452e-05, |
|
"loss": 0.5736, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4409522535776381e-05, |
|
"loss": 0.5574, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4382773839775313e-05, |
|
"loss": 0.5631, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4356025143774242e-05, |
|
"loss": 0.5662, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4329276447773172e-05, |
|
"loss": 0.5702, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4302527751772101e-05, |
|
"loss": 0.5507, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4275779055771033e-05, |
|
"loss": 0.5718, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4249030359769962e-05, |
|
"loss": 0.5545, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4222281663768893e-05, |
|
"loss": 0.5699, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4195532967767822e-05, |
|
"loss": 0.5718, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4168784271766754e-05, |
|
"loss": 0.585, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4142035575765682e-05, |
|
"loss": 0.5429, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4115286879764613e-05, |
|
"loss": 0.5741, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4088538183763543e-05, |
|
"loss": 0.569, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4061789487762472e-05, |
|
"loss": 0.5665, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4035040791761403e-05, |
|
"loss": 0.5481, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4008292095760332e-05, |
|
"loss": 0.571, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.3981543399759264e-05, |
|
"loss": 0.5899, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.3954794703758192e-05, |
|
"loss": 0.5628, |
|
"step": 113000 |
|
} |
|
], |
|
"max_steps": 373850, |
|
"num_train_epochs": 1, |
|
"total_flos": 4.507721263761245e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|