tst-summarization / trainer_state.json
sudoLife's picture
End of training
ccd0acc
raw
history blame contribute delete
No virus
26.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 107670,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.976780904615956e-05,
"loss": 1.9554,
"step": 500
},
{
"epoch": 0.03,
"learning_rate": 4.953561809231913e-05,
"loss": 1.9046,
"step": 1000
},
{
"epoch": 0.04,
"learning_rate": 4.9303427138478686e-05,
"loss": 1.9126,
"step": 1500
},
{
"epoch": 0.06,
"learning_rate": 4.9071236184638245e-05,
"loss": 1.8888,
"step": 2000
},
{
"epoch": 0.07,
"learning_rate": 4.883904523079781e-05,
"loss": 1.8961,
"step": 2500
},
{
"epoch": 0.08,
"learning_rate": 4.860685427695737e-05,
"loss": 1.8829,
"step": 3000
},
{
"epoch": 0.1,
"learning_rate": 4.837466332311693e-05,
"loss": 1.8841,
"step": 3500
},
{
"epoch": 0.11,
"learning_rate": 4.81424723692765e-05,
"loss": 1.8742,
"step": 4000
},
{
"epoch": 0.13,
"learning_rate": 4.791028141543606e-05,
"loss": 1.8831,
"step": 4500
},
{
"epoch": 0.14,
"learning_rate": 4.767809046159562e-05,
"loss": 1.8814,
"step": 5000
},
{
"epoch": 0.15,
"learning_rate": 4.7445899507755184e-05,
"loss": 1.8688,
"step": 5500
},
{
"epoch": 0.17,
"learning_rate": 4.721370855391474e-05,
"loss": 1.8595,
"step": 6000
},
{
"epoch": 0.18,
"learning_rate": 4.69815176000743e-05,
"loss": 1.8524,
"step": 6500
},
{
"epoch": 0.2,
"learning_rate": 4.674932664623387e-05,
"loss": 1.8648,
"step": 7000
},
{
"epoch": 0.21,
"learning_rate": 4.6517135692393426e-05,
"loss": 1.8587,
"step": 7500
},
{
"epoch": 0.22,
"learning_rate": 4.6284944738552985e-05,
"loss": 1.8574,
"step": 8000
},
{
"epoch": 0.24,
"learning_rate": 4.605275378471255e-05,
"loss": 1.8662,
"step": 8500
},
{
"epoch": 0.25,
"learning_rate": 4.582056283087211e-05,
"loss": 1.8661,
"step": 9000
},
{
"epoch": 0.26,
"learning_rate": 4.558837187703167e-05,
"loss": 1.831,
"step": 9500
},
{
"epoch": 0.28,
"learning_rate": 4.5356180923191235e-05,
"loss": 1.8423,
"step": 10000
},
{
"epoch": 0.29,
"learning_rate": 4.5123989969350793e-05,
"loss": 1.8448,
"step": 10500
},
{
"epoch": 0.31,
"learning_rate": 4.489179901551035e-05,
"loss": 1.8567,
"step": 11000
},
{
"epoch": 0.32,
"learning_rate": 4.4659608061669925e-05,
"loss": 1.8425,
"step": 11500
},
{
"epoch": 0.33,
"learning_rate": 4.4427417107829484e-05,
"loss": 1.8516,
"step": 12000
},
{
"epoch": 0.35,
"learning_rate": 4.419522615398904e-05,
"loss": 1.8333,
"step": 12500
},
{
"epoch": 0.36,
"learning_rate": 4.396303520014861e-05,
"loss": 1.8588,
"step": 13000
},
{
"epoch": 0.38,
"learning_rate": 4.373084424630817e-05,
"loss": 1.8571,
"step": 13500
},
{
"epoch": 0.39,
"learning_rate": 4.3498653292467726e-05,
"loss": 1.8509,
"step": 14000
},
{
"epoch": 0.4,
"learning_rate": 4.326646233862729e-05,
"loss": 1.8604,
"step": 14500
},
{
"epoch": 0.42,
"learning_rate": 4.303427138478685e-05,
"loss": 1.8501,
"step": 15000
},
{
"epoch": 0.43,
"learning_rate": 4.280208043094641e-05,
"loss": 1.8367,
"step": 15500
},
{
"epoch": 0.45,
"learning_rate": 4.2569889477105975e-05,
"loss": 1.8487,
"step": 16000
},
{
"epoch": 0.46,
"learning_rate": 4.2337698523265534e-05,
"loss": 1.8472,
"step": 16500
},
{
"epoch": 0.47,
"learning_rate": 4.210550756942509e-05,
"loss": 1.8576,
"step": 17000
},
{
"epoch": 0.49,
"learning_rate": 4.187331661558466e-05,
"loss": 1.8554,
"step": 17500
},
{
"epoch": 0.5,
"learning_rate": 4.164112566174422e-05,
"loss": 1.8348,
"step": 18000
},
{
"epoch": 0.52,
"learning_rate": 4.140893470790378e-05,
"loss": 1.8368,
"step": 18500
},
{
"epoch": 0.53,
"learning_rate": 4.117674375406335e-05,
"loss": 1.8508,
"step": 19000
},
{
"epoch": 0.54,
"learning_rate": 4.094455280022291e-05,
"loss": 1.8349,
"step": 19500
},
{
"epoch": 0.56,
"learning_rate": 4.071236184638247e-05,
"loss": 1.8553,
"step": 20000
},
{
"epoch": 0.57,
"learning_rate": 4.048017089254203e-05,
"loss": 1.838,
"step": 20500
},
{
"epoch": 0.59,
"learning_rate": 4.024797993870159e-05,
"loss": 1.8591,
"step": 21000
},
{
"epoch": 0.6,
"learning_rate": 4.001578898486115e-05,
"loss": 1.8444,
"step": 21500
},
{
"epoch": 0.61,
"learning_rate": 3.9783598031020716e-05,
"loss": 1.8336,
"step": 22000
},
{
"epoch": 0.63,
"learning_rate": 3.9551407077180275e-05,
"loss": 1.8404,
"step": 22500
},
{
"epoch": 0.64,
"learning_rate": 3.9319216123339834e-05,
"loss": 1.8406,
"step": 23000
},
{
"epoch": 0.65,
"learning_rate": 3.90870251694994e-05,
"loss": 1.8488,
"step": 23500
},
{
"epoch": 0.67,
"learning_rate": 3.885483421565896e-05,
"loss": 1.8399,
"step": 24000
},
{
"epoch": 0.68,
"learning_rate": 3.862264326181852e-05,
"loss": 1.8331,
"step": 24500
},
{
"epoch": 0.7,
"learning_rate": 3.839045230797808e-05,
"loss": 1.8437,
"step": 25000
},
{
"epoch": 0.71,
"learning_rate": 3.815826135413764e-05,
"loss": 1.8311,
"step": 25500
},
{
"epoch": 0.72,
"learning_rate": 3.792607040029721e-05,
"loss": 1.8397,
"step": 26000
},
{
"epoch": 0.74,
"learning_rate": 3.769387944645677e-05,
"loss": 1.8535,
"step": 26500
},
{
"epoch": 0.75,
"learning_rate": 3.746168849261633e-05,
"loss": 1.8485,
"step": 27000
},
{
"epoch": 0.77,
"learning_rate": 3.722949753877589e-05,
"loss": 1.8113,
"step": 27500
},
{
"epoch": 0.78,
"learning_rate": 3.6997306584935456e-05,
"loss": 1.8148,
"step": 28000
},
{
"epoch": 0.79,
"learning_rate": 3.6765115631095015e-05,
"loss": 1.8111,
"step": 28500
},
{
"epoch": 0.81,
"learning_rate": 3.6532924677254574e-05,
"loss": 1.822,
"step": 29000
},
{
"epoch": 0.82,
"learning_rate": 3.630073372341414e-05,
"loss": 1.8385,
"step": 29500
},
{
"epoch": 0.84,
"learning_rate": 3.60685427695737e-05,
"loss": 1.8217,
"step": 30000
},
{
"epoch": 0.85,
"learning_rate": 3.583635181573326e-05,
"loss": 1.8279,
"step": 30500
},
{
"epoch": 0.86,
"learning_rate": 3.560416086189282e-05,
"loss": 1.8443,
"step": 31000
},
{
"epoch": 0.88,
"learning_rate": 3.537196990805238e-05,
"loss": 1.848,
"step": 31500
},
{
"epoch": 0.89,
"learning_rate": 3.513977895421194e-05,
"loss": 1.8408,
"step": 32000
},
{
"epoch": 0.91,
"learning_rate": 3.490758800037151e-05,
"loss": 1.8216,
"step": 32500
},
{
"epoch": 0.92,
"learning_rate": 3.4675397046531066e-05,
"loss": 1.8322,
"step": 33000
},
{
"epoch": 0.93,
"learning_rate": 3.444320609269063e-05,
"loss": 1.8282,
"step": 33500
},
{
"epoch": 0.95,
"learning_rate": 3.42110151388502e-05,
"loss": 1.8157,
"step": 34000
},
{
"epoch": 0.96,
"learning_rate": 3.3978824185009756e-05,
"loss": 1.8295,
"step": 34500
},
{
"epoch": 0.98,
"learning_rate": 3.3746633231169315e-05,
"loss": 1.82,
"step": 35000
},
{
"epoch": 0.99,
"learning_rate": 3.351444227732888e-05,
"loss": 1.8348,
"step": 35500
},
{
"epoch": 1.0,
"learning_rate": 3.328225132348844e-05,
"loss": 1.8431,
"step": 36000
},
{
"epoch": 1.02,
"learning_rate": 3.3050060369648e-05,
"loss": 1.8095,
"step": 36500
},
{
"epoch": 1.03,
"learning_rate": 3.2817869415807564e-05,
"loss": 1.8152,
"step": 37000
},
{
"epoch": 1.04,
"learning_rate": 3.258567846196712e-05,
"loss": 1.8002,
"step": 37500
},
{
"epoch": 1.06,
"learning_rate": 3.235348750812668e-05,
"loss": 1.8187,
"step": 38000
},
{
"epoch": 1.07,
"learning_rate": 3.212129655428625e-05,
"loss": 1.8142,
"step": 38500
},
{
"epoch": 1.09,
"learning_rate": 3.1889105600445806e-05,
"loss": 1.8112,
"step": 39000
},
{
"epoch": 1.1,
"learning_rate": 3.1656914646605365e-05,
"loss": 1.7863,
"step": 39500
},
{
"epoch": 1.11,
"learning_rate": 3.142472369276493e-05,
"loss": 1.8133,
"step": 40000
},
{
"epoch": 1.13,
"learning_rate": 3.119253273892449e-05,
"loss": 1.8012,
"step": 40500
},
{
"epoch": 1.14,
"learning_rate": 3.0960341785084055e-05,
"loss": 1.812,
"step": 41000
},
{
"epoch": 1.16,
"learning_rate": 3.072815083124362e-05,
"loss": 1.8144,
"step": 41500
},
{
"epoch": 1.17,
"learning_rate": 3.049595987740318e-05,
"loss": 1.7835,
"step": 42000
},
{
"epoch": 1.18,
"learning_rate": 3.0263768923562742e-05,
"loss": 1.826,
"step": 42500
},
{
"epoch": 1.2,
"learning_rate": 3.00315779697223e-05,
"loss": 1.8227,
"step": 43000
},
{
"epoch": 1.21,
"learning_rate": 2.9799387015881863e-05,
"loss": 1.7956,
"step": 43500
},
{
"epoch": 1.23,
"learning_rate": 2.9567196062041426e-05,
"loss": 1.8176,
"step": 44000
},
{
"epoch": 1.24,
"learning_rate": 2.9335005108200985e-05,
"loss": 1.8231,
"step": 44500
},
{
"epoch": 1.25,
"learning_rate": 2.9102814154360547e-05,
"loss": 1.7996,
"step": 45000
},
{
"epoch": 1.27,
"learning_rate": 2.887062320052011e-05,
"loss": 1.8013,
"step": 45500
},
{
"epoch": 1.28,
"learning_rate": 2.8638432246679668e-05,
"loss": 1.794,
"step": 46000
},
{
"epoch": 1.3,
"learning_rate": 2.840624129283923e-05,
"loss": 1.8123,
"step": 46500
},
{
"epoch": 1.31,
"learning_rate": 2.8174050338998793e-05,
"loss": 1.8006,
"step": 47000
},
{
"epoch": 1.32,
"learning_rate": 2.794185938515835e-05,
"loss": 1.8032,
"step": 47500
},
{
"epoch": 1.34,
"learning_rate": 2.7709668431317914e-05,
"loss": 1.8006,
"step": 48000
},
{
"epoch": 1.35,
"learning_rate": 2.7477477477477483e-05,
"loss": 1.8017,
"step": 48500
},
{
"epoch": 1.37,
"learning_rate": 2.7245286523637042e-05,
"loss": 1.8057,
"step": 49000
},
{
"epoch": 1.38,
"learning_rate": 2.7013095569796604e-05,
"loss": 1.783,
"step": 49500
},
{
"epoch": 1.39,
"learning_rate": 2.6780904615956166e-05,
"loss": 1.8021,
"step": 50000
},
{
"epoch": 1.41,
"learning_rate": 2.6548713662115725e-05,
"loss": 1.7841,
"step": 50500
},
{
"epoch": 1.42,
"learning_rate": 2.6316522708275288e-05,
"loss": 1.8369,
"step": 51000
},
{
"epoch": 1.43,
"learning_rate": 2.608433175443485e-05,
"loss": 1.8124,
"step": 51500
},
{
"epoch": 1.45,
"learning_rate": 2.585214080059441e-05,
"loss": 1.7967,
"step": 52000
},
{
"epoch": 1.46,
"learning_rate": 2.561994984675397e-05,
"loss": 1.8099,
"step": 52500
},
{
"epoch": 1.48,
"learning_rate": 2.5387758892913533e-05,
"loss": 1.8068,
"step": 53000
},
{
"epoch": 1.49,
"learning_rate": 2.5155567939073092e-05,
"loss": 1.81,
"step": 53500
},
{
"epoch": 1.5,
"learning_rate": 2.4923376985232658e-05,
"loss": 1.8087,
"step": 54000
},
{
"epoch": 1.52,
"learning_rate": 2.469118603139222e-05,
"loss": 1.8065,
"step": 54500
},
{
"epoch": 1.53,
"learning_rate": 2.445899507755178e-05,
"loss": 1.7961,
"step": 55000
},
{
"epoch": 1.55,
"learning_rate": 2.422680412371134e-05,
"loss": 1.7958,
"step": 55500
},
{
"epoch": 1.56,
"learning_rate": 2.3994613169870904e-05,
"loss": 1.7864,
"step": 56000
},
{
"epoch": 1.57,
"learning_rate": 2.3762422216030463e-05,
"loss": 1.8008,
"step": 56500
},
{
"epoch": 1.59,
"learning_rate": 2.3530231262190025e-05,
"loss": 1.7768,
"step": 57000
},
{
"epoch": 1.6,
"learning_rate": 2.329804030834959e-05,
"loss": 1.8006,
"step": 57500
},
{
"epoch": 1.62,
"learning_rate": 2.306584935450915e-05,
"loss": 1.7848,
"step": 58000
},
{
"epoch": 1.63,
"learning_rate": 2.283365840066871e-05,
"loss": 1.8123,
"step": 58500
},
{
"epoch": 1.64,
"learning_rate": 2.2601467446828274e-05,
"loss": 1.7992,
"step": 59000
},
{
"epoch": 1.66,
"learning_rate": 2.2369276492987833e-05,
"loss": 1.8063,
"step": 59500
},
{
"epoch": 1.67,
"learning_rate": 2.2137085539147395e-05,
"loss": 1.8029,
"step": 60000
},
{
"epoch": 1.69,
"learning_rate": 2.1904894585306957e-05,
"loss": 1.8058,
"step": 60500
},
{
"epoch": 1.7,
"learning_rate": 2.1672703631466516e-05,
"loss": 1.7953,
"step": 61000
},
{
"epoch": 1.71,
"learning_rate": 2.1440512677626082e-05,
"loss": 1.7968,
"step": 61500
},
{
"epoch": 1.73,
"learning_rate": 2.1208321723785644e-05,
"loss": 1.797,
"step": 62000
},
{
"epoch": 1.74,
"learning_rate": 2.0976130769945203e-05,
"loss": 1.8062,
"step": 62500
},
{
"epoch": 1.76,
"learning_rate": 2.0743939816104765e-05,
"loss": 1.7876,
"step": 63000
},
{
"epoch": 1.77,
"learning_rate": 2.0511748862264328e-05,
"loss": 1.817,
"step": 63500
},
{
"epoch": 1.78,
"learning_rate": 2.0279557908423887e-05,
"loss": 1.8015,
"step": 64000
},
{
"epoch": 1.8,
"learning_rate": 2.004736695458345e-05,
"loss": 1.8002,
"step": 64500
},
{
"epoch": 1.81,
"learning_rate": 1.9815176000743015e-05,
"loss": 1.8066,
"step": 65000
},
{
"epoch": 1.83,
"learning_rate": 1.9582985046902573e-05,
"loss": 1.814,
"step": 65500
},
{
"epoch": 1.84,
"learning_rate": 1.9350794093062136e-05,
"loss": 1.7941,
"step": 66000
},
{
"epoch": 1.85,
"learning_rate": 1.9118603139221698e-05,
"loss": 1.7946,
"step": 66500
},
{
"epoch": 1.87,
"learning_rate": 1.8886412185381257e-05,
"loss": 1.7813,
"step": 67000
},
{
"epoch": 1.88,
"learning_rate": 1.865422123154082e-05,
"loss": 1.7869,
"step": 67500
},
{
"epoch": 1.89,
"learning_rate": 1.842203027770038e-05,
"loss": 1.8165,
"step": 68000
},
{
"epoch": 1.91,
"learning_rate": 1.8189839323859944e-05,
"loss": 1.7859,
"step": 68500
},
{
"epoch": 1.92,
"learning_rate": 1.7957648370019506e-05,
"loss": 1.785,
"step": 69000
},
{
"epoch": 1.94,
"learning_rate": 1.772545741617907e-05,
"loss": 1.7911,
"step": 69500
},
{
"epoch": 1.95,
"learning_rate": 1.7493266462338627e-05,
"loss": 1.8297,
"step": 70000
},
{
"epoch": 1.96,
"learning_rate": 1.726107550849819e-05,
"loss": 1.8001,
"step": 70500
},
{
"epoch": 1.98,
"learning_rate": 1.7028884554657752e-05,
"loss": 1.7867,
"step": 71000
},
{
"epoch": 1.99,
"learning_rate": 1.679669360081731e-05,
"loss": 1.7958,
"step": 71500
},
{
"epoch": 2.01,
"learning_rate": 1.6564502646976873e-05,
"loss": 1.7747,
"step": 72000
},
{
"epoch": 2.02,
"learning_rate": 1.633231169313644e-05,
"loss": 1.786,
"step": 72500
},
{
"epoch": 2.03,
"learning_rate": 1.6100120739295998e-05,
"loss": 1.7847,
"step": 73000
},
{
"epoch": 2.05,
"learning_rate": 1.586792978545556e-05,
"loss": 1.78,
"step": 73500
},
{
"epoch": 2.06,
"learning_rate": 1.5635738831615122e-05,
"loss": 1.774,
"step": 74000
},
{
"epoch": 2.08,
"learning_rate": 1.540354787777468e-05,
"loss": 1.804,
"step": 74500
},
{
"epoch": 2.09,
"learning_rate": 1.5171356923934243e-05,
"loss": 1.7776,
"step": 75000
},
{
"epoch": 2.1,
"learning_rate": 1.4939165970093804e-05,
"loss": 1.8007,
"step": 75500
},
{
"epoch": 2.12,
"learning_rate": 1.470697501625337e-05,
"loss": 1.7747,
"step": 76000
},
{
"epoch": 2.13,
"learning_rate": 1.447478406241293e-05,
"loss": 1.7932,
"step": 76500
},
{
"epoch": 2.15,
"learning_rate": 1.424259310857249e-05,
"loss": 1.7718,
"step": 77000
},
{
"epoch": 2.16,
"learning_rate": 1.4010402154732053e-05,
"loss": 1.7861,
"step": 77500
},
{
"epoch": 2.17,
"learning_rate": 1.3778211200891614e-05,
"loss": 1.783,
"step": 78000
},
{
"epoch": 2.19,
"learning_rate": 1.3546020247051174e-05,
"loss": 1.7914,
"step": 78500
},
{
"epoch": 2.2,
"learning_rate": 1.3313829293210736e-05,
"loss": 1.7831,
"step": 79000
},
{
"epoch": 2.22,
"learning_rate": 1.30816383393703e-05,
"loss": 1.7912,
"step": 79500
},
{
"epoch": 2.23,
"learning_rate": 1.2849447385529861e-05,
"loss": 1.7925,
"step": 80000
},
{
"epoch": 2.24,
"learning_rate": 1.2617256431689423e-05,
"loss": 1.7918,
"step": 80500
},
{
"epoch": 2.26,
"learning_rate": 1.2385065477848984e-05,
"loss": 1.7757,
"step": 81000
},
{
"epoch": 2.27,
"learning_rate": 1.2152874524008545e-05,
"loss": 1.7817,
"step": 81500
},
{
"epoch": 2.28,
"learning_rate": 1.1920683570168107e-05,
"loss": 1.7793,
"step": 82000
},
{
"epoch": 2.3,
"learning_rate": 1.1688492616327669e-05,
"loss": 1.7921,
"step": 82500
},
{
"epoch": 2.31,
"learning_rate": 1.145630166248723e-05,
"loss": 1.7963,
"step": 83000
},
{
"epoch": 2.33,
"learning_rate": 1.1224110708646792e-05,
"loss": 1.7736,
"step": 83500
},
{
"epoch": 2.34,
"learning_rate": 1.0991919754806353e-05,
"loss": 1.7757,
"step": 84000
},
{
"epoch": 2.35,
"learning_rate": 1.0759728800965915e-05,
"loss": 1.7496,
"step": 84500
},
{
"epoch": 2.37,
"learning_rate": 1.0527537847125477e-05,
"loss": 1.8009,
"step": 85000
},
{
"epoch": 2.38,
"learning_rate": 1.0295346893285038e-05,
"loss": 1.781,
"step": 85500
},
{
"epoch": 2.4,
"learning_rate": 1.00631559394446e-05,
"loss": 1.794,
"step": 86000
},
{
"epoch": 2.41,
"learning_rate": 9.830964985604162e-06,
"loss": 1.7692,
"step": 86500
},
{
"epoch": 2.42,
"learning_rate": 9.598774031763723e-06,
"loss": 1.7747,
"step": 87000
},
{
"epoch": 2.44,
"learning_rate": 9.366583077923283e-06,
"loss": 1.7775,
"step": 87500
},
{
"epoch": 2.45,
"learning_rate": 9.134392124082847e-06,
"loss": 1.7649,
"step": 88000
},
{
"epoch": 2.47,
"learning_rate": 8.902201170242408e-06,
"loss": 1.7753,
"step": 88500
},
{
"epoch": 2.48,
"learning_rate": 8.670010216401969e-06,
"loss": 1.771,
"step": 89000
},
{
"epoch": 2.49,
"learning_rate": 8.437819262561531e-06,
"loss": 1.7666,
"step": 89500
},
{
"epoch": 2.51,
"learning_rate": 8.205628308721093e-06,
"loss": 1.7985,
"step": 90000
},
{
"epoch": 2.52,
"learning_rate": 7.973437354880654e-06,
"loss": 1.7865,
"step": 90500
},
{
"epoch": 2.54,
"learning_rate": 7.741246401040216e-06,
"loss": 1.7887,
"step": 91000
},
{
"epoch": 2.55,
"learning_rate": 7.509055447199777e-06,
"loss": 1.7825,
"step": 91500
},
{
"epoch": 2.56,
"learning_rate": 7.27686449335934e-06,
"loss": 1.7903,
"step": 92000
},
{
"epoch": 2.58,
"learning_rate": 7.0446735395189e-06,
"loss": 1.7918,
"step": 92500
},
{
"epoch": 2.59,
"learning_rate": 6.812482585678462e-06,
"loss": 1.7839,
"step": 93000
},
{
"epoch": 2.61,
"learning_rate": 6.580291631838025e-06,
"loss": 1.768,
"step": 93500
},
{
"epoch": 2.62,
"learning_rate": 6.3481006779975855e-06,
"loss": 1.7795,
"step": 94000
},
{
"epoch": 2.63,
"learning_rate": 6.115909724157147e-06,
"loss": 1.7708,
"step": 94500
},
{
"epoch": 2.65,
"learning_rate": 5.883718770316709e-06,
"loss": 1.7872,
"step": 95000
},
{
"epoch": 2.66,
"learning_rate": 5.65152781647627e-06,
"loss": 1.7933,
"step": 95500
},
{
"epoch": 2.67,
"learning_rate": 5.419336862635832e-06,
"loss": 1.7701,
"step": 96000
},
{
"epoch": 2.69,
"learning_rate": 5.1871459087953935e-06,
"loss": 1.7807,
"step": 96500
},
{
"epoch": 2.7,
"learning_rate": 4.954954954954955e-06,
"loss": 1.7973,
"step": 97000
},
{
"epoch": 2.72,
"learning_rate": 4.722764001114516e-06,
"loss": 1.8021,
"step": 97500
},
{
"epoch": 2.73,
"learning_rate": 4.490573047274079e-06,
"loss": 1.8002,
"step": 98000
},
{
"epoch": 2.74,
"learning_rate": 4.25838209343364e-06,
"loss": 1.7783,
"step": 98500
},
{
"epoch": 2.76,
"learning_rate": 4.0261911395932016e-06,
"loss": 1.784,
"step": 99000
},
{
"epoch": 2.77,
"learning_rate": 3.7940001857527634e-06,
"loss": 1.7755,
"step": 99500
},
{
"epoch": 2.79,
"learning_rate": 3.561809231912325e-06,
"loss": 1.7845,
"step": 100000
},
{
"epoch": 2.8,
"learning_rate": 3.3296182780718867e-06,
"loss": 1.791,
"step": 100500
},
{
"epoch": 2.81,
"learning_rate": 3.097427324231448e-06,
"loss": 1.7703,
"step": 101000
},
{
"epoch": 2.83,
"learning_rate": 2.86523637039101e-06,
"loss": 1.7896,
"step": 101500
},
{
"epoch": 2.84,
"learning_rate": 2.6330454165505714e-06,
"loss": 1.7663,
"step": 102000
},
{
"epoch": 2.86,
"learning_rate": 2.400854462710133e-06,
"loss": 1.7777,
"step": 102500
},
{
"epoch": 2.87,
"learning_rate": 2.1686635088696947e-06,
"loss": 1.7817,
"step": 103000
},
{
"epoch": 2.88,
"learning_rate": 1.936472555029256e-06,
"loss": 1.7943,
"step": 103500
},
{
"epoch": 2.9,
"learning_rate": 1.7042816011888178e-06,
"loss": 1.7932,
"step": 104000
},
{
"epoch": 2.91,
"learning_rate": 1.4720906473483793e-06,
"loss": 1.7895,
"step": 104500
},
{
"epoch": 2.93,
"learning_rate": 1.239899693507941e-06,
"loss": 1.772,
"step": 105000
},
{
"epoch": 2.94,
"learning_rate": 1.0077087396675028e-06,
"loss": 1.7622,
"step": 105500
},
{
"epoch": 2.95,
"learning_rate": 7.755177858270642e-07,
"loss": 1.7828,
"step": 106000
},
{
"epoch": 2.97,
"learning_rate": 5.433268319866259e-07,
"loss": 1.7889,
"step": 106500
},
{
"epoch": 2.98,
"learning_rate": 3.1113587814618745e-07,
"loss": 1.7694,
"step": 107000
},
{
"epoch": 3.0,
"learning_rate": 7.894492430574905e-08,
"loss": 1.7807,
"step": 107500
},
{
"epoch": 3.0,
"step": 107670,
"total_flos": 2.32802467336618e+17,
"train_loss": 1.6543810425201497,
"train_runtime": 42620.4478,
"train_samples_per_second": 20.21,
"train_steps_per_second": 2.526
}
],
"max_steps": 107670,
"num_train_epochs": 3,
"total_flos": 2.32802467336618e+17,
"trial_name": null,
"trial_params": null
}