|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 125600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.988296178343949e-05, |
|
"loss": 7.6975, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.9763535031847134e-05, |
|
"loss": 4.413, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.964410828025478e-05, |
|
"loss": 3.5707, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.952468152866242e-05, |
|
"loss": 3.4966, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.9405254777070064e-05, |
|
"loss": 3.7477, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.9285828025477707e-05, |
|
"loss": 3.4618, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.916640127388535e-05, |
|
"loss": 3.4774, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 2.9046974522292994e-05, |
|
"loss": 3.4214, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.8927786624203823e-05, |
|
"loss": 2.5773, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 2.8808359872611467e-05, |
|
"loss": 3.2549, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 2.868893312101911e-05, |
|
"loss": 2.4922, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 2.856950636942675e-05, |
|
"loss": 2.6262, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 2.845031847133758e-05, |
|
"loss": 2.5596, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 2.8331130573248408e-05, |
|
"loss": 2.7913, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 2.821170382165605e-05, |
|
"loss": 3.0112, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 2.8092277070063698e-05, |
|
"loss": 2.6159, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 2.7972850318471338e-05, |
|
"loss": 3.9984, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 2.785342356687898e-05, |
|
"loss": 2.6099, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 2.7733996815286625e-05, |
|
"loss": 2.5667, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 2.7614570063694268e-05, |
|
"loss": 2.5934, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 2.7495382165605094e-05, |
|
"loss": 2.7481, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 2.737595541401274e-05, |
|
"loss": 3.0303, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 2.7256528662420384e-05, |
|
"loss": 2.8187, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 2.7137101910828027e-05, |
|
"loss": 2.8765, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 2.7017914012738853e-05, |
|
"loss": 2.8891, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 2.6898487261146496e-05, |
|
"loss": 2.6341, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 2.677906050955414e-05, |
|
"loss": 2.9864, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 2.6659633757961786e-05, |
|
"loss": 2.3945, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 2.6540207006369426e-05, |
|
"loss": 2.4969, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 11.94, |
|
"learning_rate": 2.6421019108280255e-05, |
|
"loss": 3.4386, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 2.63015923566879e-05, |
|
"loss": 2.1529, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"learning_rate": 2.6182165605095542e-05, |
|
"loss": 2.4111, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"learning_rate": 2.6062738853503186e-05, |
|
"loss": 2.1625, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"learning_rate": 2.594355095541401e-05, |
|
"loss": 2.221, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"learning_rate": 2.5824124203821655e-05, |
|
"loss": 2.3075, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 2.57046974522293e-05, |
|
"loss": 2.0438, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 14.73, |
|
"learning_rate": 2.5585270700636945e-05, |
|
"loss": 2.2311, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"learning_rate": 2.5465843949044585e-05, |
|
"loss": 2.3121, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 2.5346417197452228e-05, |
|
"loss": 1.8979, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 15.92, |
|
"learning_rate": 2.5226990445859875e-05, |
|
"loss": 2.0793, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 2.5107563694267518e-05, |
|
"loss": 2.4946, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"learning_rate": 2.4988136942675158e-05, |
|
"loss": 2.6039, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"learning_rate": 2.4868949044585987e-05, |
|
"loss": 2.3325, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 2.4749761146496816e-05, |
|
"loss": 2.5405, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"learning_rate": 2.463033439490446e-05, |
|
"loss": 2.2666, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 18.31, |
|
"learning_rate": 2.4510907643312103e-05, |
|
"loss": 1.8855, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 18.71, |
|
"learning_rate": 2.439171974522293e-05, |
|
"loss": 2.5188, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"learning_rate": 2.4272292993630572e-05, |
|
"loss": 2.0856, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"learning_rate": 2.415286624203822e-05, |
|
"loss": 2.04, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 19.9, |
|
"learning_rate": 2.4033439490445862e-05, |
|
"loss": 2.0085, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 20.3, |
|
"learning_rate": 2.3914012738853502e-05, |
|
"loss": 2.2144, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 20.7, |
|
"learning_rate": 2.3794585987261145e-05, |
|
"loss": 2.1031, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 21.1, |
|
"learning_rate": 2.3675159235668792e-05, |
|
"loss": 2.1336, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 21.5, |
|
"learning_rate": 2.3555732484076436e-05, |
|
"loss": 2.3206, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 21.89, |
|
"learning_rate": 2.3436783439490447e-05, |
|
"loss": 2.0794, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 22.29, |
|
"learning_rate": 2.3317356687898087e-05, |
|
"loss": 2.0694, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 22.69, |
|
"learning_rate": 2.3197929936305734e-05, |
|
"loss": 2.0866, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 23.09, |
|
"learning_rate": 2.3078503184713377e-05, |
|
"loss": 2.2699, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 23.49, |
|
"learning_rate": 2.295907643312102e-05, |
|
"loss": 2.0621, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 23.89, |
|
"learning_rate": 2.283964968152866e-05, |
|
"loss": 2.1662, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 24.28, |
|
"learning_rate": 2.2720222929936307e-05, |
|
"loss": 1.9482, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 24.68, |
|
"learning_rate": 2.260079617834395e-05, |
|
"loss": 1.9606, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 25.08, |
|
"learning_rate": 2.2481369426751594e-05, |
|
"loss": 2.2075, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 25.48, |
|
"learning_rate": 2.2361942675159234e-05, |
|
"loss": 1.9507, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 25.88, |
|
"learning_rate": 2.224251592356688e-05, |
|
"loss": 1.99, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 26.27, |
|
"learning_rate": 2.2123089171974524e-05, |
|
"loss": 2.0022, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 2.2003662420382167e-05, |
|
"loss": 2.0491, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 27.07, |
|
"learning_rate": 2.1884235668789807e-05, |
|
"loss": 2.1034, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 27.47, |
|
"learning_rate": 2.1765047770700636e-05, |
|
"loss": 2.0613, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 27.87, |
|
"learning_rate": 2.164562101910828e-05, |
|
"loss": 2.0392, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 28.26, |
|
"learning_rate": 2.1526194267515926e-05, |
|
"loss": 1.9726, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 28.66, |
|
"learning_rate": 2.1406767515923566e-05, |
|
"loss": 1.6027, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 29.06, |
|
"learning_rate": 2.128734076433121e-05, |
|
"loss": 2.5043, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 29.46, |
|
"learning_rate": 2.1167914012738853e-05, |
|
"loss": 2.111, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 29.86, |
|
"learning_rate": 2.10484872611465e-05, |
|
"loss": 2.0965, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 30.25, |
|
"learning_rate": 2.092906050955414e-05, |
|
"loss": 1.9389, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 30.65, |
|
"learning_rate": 2.0809633757961783e-05, |
|
"loss": 2.1853, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 31.05, |
|
"learning_rate": 2.0690207006369427e-05, |
|
"loss": 1.946, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 31.45, |
|
"learning_rate": 2.0570780254777073e-05, |
|
"loss": 2.0071, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 31.85, |
|
"learning_rate": 2.0451831210191085e-05, |
|
"loss": 2.2209, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 32.25, |
|
"learning_rate": 2.0332404458598725e-05, |
|
"loss": 2.0641, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 32.64, |
|
"learning_rate": 2.021297770700637e-05, |
|
"loss": 1.9674, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 33.04, |
|
"learning_rate": 2.0093550955414015e-05, |
|
"loss": 2.0169, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 33.44, |
|
"learning_rate": 1.9974124203821658e-05, |
|
"loss": 2.0076, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 33.84, |
|
"learning_rate": 1.9854936305732484e-05, |
|
"loss": 2.2492, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 34.24, |
|
"learning_rate": 1.9735509554140127e-05, |
|
"loss": 1.7799, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 34.63, |
|
"learning_rate": 1.9616321656050956e-05, |
|
"loss": 2.1029, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 35.03, |
|
"learning_rate": 1.94968949044586e-05, |
|
"loss": 1.8489, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 35.43, |
|
"learning_rate": 1.937770700636943e-05, |
|
"loss": 1.6914, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 35.83, |
|
"learning_rate": 1.925828025477707e-05, |
|
"loss": 2.4834, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 36.23, |
|
"learning_rate": 1.9138853503184712e-05, |
|
"loss": 1.7828, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 36.62, |
|
"learning_rate": 1.901942675159236e-05, |
|
"loss": 1.7454, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 37.02, |
|
"learning_rate": 1.8900000000000002e-05, |
|
"loss": 2.2061, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 37.42, |
|
"learning_rate": 1.8780573248407642e-05, |
|
"loss": 1.9779, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 37.82, |
|
"learning_rate": 1.8661146496815285e-05, |
|
"loss": 1.9194, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 38.22, |
|
"learning_rate": 1.8541719745222932e-05, |
|
"loss": 1.9433, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 38.61, |
|
"learning_rate": 1.8422292993630575e-05, |
|
"loss": 1.9242, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 1.8302866242038215e-05, |
|
"loss": 1.9324, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 39.41, |
|
"learning_rate": 1.818343949044586e-05, |
|
"loss": 1.9326, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 39.81, |
|
"learning_rate": 1.8064012738853506e-05, |
|
"loss": 1.8975, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 40.21, |
|
"learning_rate": 1.794458598726115e-05, |
|
"loss": 2.0671, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 40.61, |
|
"learning_rate": 1.782515923566879e-05, |
|
"loss": 2.209, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 1.7705971337579618e-05, |
|
"loss": 1.9638, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 41.4, |
|
"learning_rate": 1.758654458598726e-05, |
|
"loss": 1.6372, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 41.8, |
|
"learning_rate": 1.7467117834394905e-05, |
|
"loss": 2.1012, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 42.2, |
|
"learning_rate": 1.7347691082802548e-05, |
|
"loss": 2.1207, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 42.6, |
|
"learning_rate": 1.722826433121019e-05, |
|
"loss": 1.8135, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 42.99, |
|
"learning_rate": 1.7108837579617835e-05, |
|
"loss": 2.1076, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 43.39, |
|
"learning_rate": 1.6989410828025478e-05, |
|
"loss": 1.8442, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 43.79, |
|
"learning_rate": 1.686998407643312e-05, |
|
"loss": 1.8598, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 44.19, |
|
"learning_rate": 1.6750557324840765e-05, |
|
"loss": 1.8612, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 44.59, |
|
"learning_rate": 1.6631369426751594e-05, |
|
"loss": 1.7965, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 44.98, |
|
"learning_rate": 1.6511942675159237e-05, |
|
"loss": 1.8743, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 45.38, |
|
"learning_rate": 1.6392515923566877e-05, |
|
"loss": 1.8581, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 45.78, |
|
"learning_rate": 1.627308917197452e-05, |
|
"loss": 1.8212, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 46.18, |
|
"learning_rate": 1.6153662420382167e-05, |
|
"loss": 1.7475, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 46.58, |
|
"learning_rate": 1.603423566878981e-05, |
|
"loss": 1.5722, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 46.97, |
|
"learning_rate": 1.591480891719745e-05, |
|
"loss": 1.7929, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 47.37, |
|
"learning_rate": 1.5795382165605094e-05, |
|
"loss": 1.7854, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 47.77, |
|
"learning_rate": 1.5676194267515923e-05, |
|
"loss": 1.6686, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 48.17, |
|
"learning_rate": 1.5556767515923566e-05, |
|
"loss": 1.8969, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 48.57, |
|
"learning_rate": 1.543734076433121e-05, |
|
"loss": 1.6989, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 48.96, |
|
"learning_rate": 1.5317914012738853e-05, |
|
"loss": 1.9204, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 49.36, |
|
"learning_rate": 1.5198726114649682e-05, |
|
"loss": 1.7297, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 49.76, |
|
"learning_rate": 1.5079299363057326e-05, |
|
"loss": 1.6225, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 50.16, |
|
"learning_rate": 1.4959872611464969e-05, |
|
"loss": 1.8492, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 50.56, |
|
"learning_rate": 1.4840445859872612e-05, |
|
"loss": 1.948, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 50.96, |
|
"learning_rate": 1.472125796178344e-05, |
|
"loss": 1.4926, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 51.35, |
|
"learning_rate": 1.4601831210191083e-05, |
|
"loss": 1.8296, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 51.75, |
|
"learning_rate": 1.4482404458598726e-05, |
|
"loss": 1.672, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 52.15, |
|
"learning_rate": 1.436297770700637e-05, |
|
"loss": 1.7793, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 52.55, |
|
"learning_rate": 1.4243789808917199e-05, |
|
"loss": 1.6569, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 52.95, |
|
"learning_rate": 1.412436305732484e-05, |
|
"loss": 2.2555, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 53.34, |
|
"learning_rate": 1.4004936305732486e-05, |
|
"loss": 1.7448, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 53.74, |
|
"learning_rate": 1.3885509554140127e-05, |
|
"loss": 1.8515, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 54.14, |
|
"learning_rate": 1.3766321656050956e-05, |
|
"loss": 1.5126, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 54.54, |
|
"learning_rate": 1.3646894904458598e-05, |
|
"loss": 1.8567, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 54.94, |
|
"learning_rate": 1.3527468152866243e-05, |
|
"loss": 1.8848, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 55.33, |
|
"learning_rate": 1.340828025477707e-05, |
|
"loss": 1.6216, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 55.73, |
|
"learning_rate": 1.3288853503184714e-05, |
|
"loss": 2.0117, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 56.13, |
|
"learning_rate": 1.3169426751592357e-05, |
|
"loss": 1.8113, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 56.53, |
|
"learning_rate": 1.305e-05, |
|
"loss": 1.7053, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 56.93, |
|
"learning_rate": 1.2930573248407644e-05, |
|
"loss": 1.7271, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 57.32, |
|
"learning_rate": 1.2811385350318471e-05, |
|
"loss": 1.6382, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 57.72, |
|
"learning_rate": 1.2691958598726116e-05, |
|
"loss": 1.5688, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 58.12, |
|
"learning_rate": 1.2572531847133758e-05, |
|
"loss": 1.6947, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 58.52, |
|
"learning_rate": 1.2453105095541403e-05, |
|
"loss": 1.5709, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 58.92, |
|
"learning_rate": 1.2333678343949045e-05, |
|
"loss": 1.8282, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 59.32, |
|
"learning_rate": 1.221425159235669e-05, |
|
"loss": 1.6447, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 59.71, |
|
"learning_rate": 1.2095063694267515e-05, |
|
"loss": 2.1066, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 60.11, |
|
"learning_rate": 1.197563694267516e-05, |
|
"loss": 1.7348, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 60.51, |
|
"learning_rate": 1.1856210191082802e-05, |
|
"loss": 1.5, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 60.91, |
|
"learning_rate": 1.1736783439490447e-05, |
|
"loss": 1.8146, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 61.31, |
|
"learning_rate": 1.1617356687898089e-05, |
|
"loss": 1.7271, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 61.7, |
|
"learning_rate": 1.1497929936305734e-05, |
|
"loss": 1.4879, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 62.1, |
|
"learning_rate": 1.1378503184713375e-05, |
|
"loss": 1.7706, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"learning_rate": 1.125907643312102e-05, |
|
"loss": 1.7463, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 62.9, |
|
"learning_rate": 1.1139888535031846e-05, |
|
"loss": 1.4587, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 63.3, |
|
"learning_rate": 1.1020461783439491e-05, |
|
"loss": 1.754, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 63.69, |
|
"learning_rate": 1.0901035031847133e-05, |
|
"loss": 1.6684, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 64.09, |
|
"learning_rate": 1.0781847133757962e-05, |
|
"loss": 1.4427, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 64.49, |
|
"learning_rate": 1.0662420382165605e-05, |
|
"loss": 1.5719, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 64.89, |
|
"learning_rate": 1.0542993630573249e-05, |
|
"loss": 1.7179, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 65.29, |
|
"learning_rate": 1.0423566878980892e-05, |
|
"loss": 1.6181, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 65.68, |
|
"learning_rate": 1.0304140127388535e-05, |
|
"loss": 1.4319, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 66.08, |
|
"learning_rate": 1.0185191082802548e-05, |
|
"loss": 1.78, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 66.48, |
|
"learning_rate": 1.0065764331210192e-05, |
|
"loss": 1.5097, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 66.88, |
|
"learning_rate": 9.946337579617835e-06, |
|
"loss": 1.6796, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 67.28, |
|
"learning_rate": 9.826910828025479e-06, |
|
"loss": 1.7987, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 67.68, |
|
"learning_rate": 9.707484076433122e-06, |
|
"loss": 1.5529, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 68.07, |
|
"learning_rate": 9.588057324840764e-06, |
|
"loss": 1.7102, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 68.47, |
|
"learning_rate": 9.468630573248409e-06, |
|
"loss": 1.7089, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 68.87, |
|
"learning_rate": 9.34920382165605e-06, |
|
"loss": 1.5696, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 69.27, |
|
"learning_rate": 9.229777070063695e-06, |
|
"loss": 1.4092, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 69.67, |
|
"learning_rate": 9.110589171974523e-06, |
|
"loss": 1.7112, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 70.06, |
|
"learning_rate": 8.991162420382166e-06, |
|
"loss": 1.6769, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 70.46, |
|
"learning_rate": 8.87173566878981e-06, |
|
"loss": 1.7474, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 70.86, |
|
"learning_rate": 8.752547770700637e-06, |
|
"loss": 1.3906, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 71.26, |
|
"learning_rate": 8.63312101910828e-06, |
|
"loss": 1.6714, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 71.66, |
|
"learning_rate": 8.513694267515923e-06, |
|
"loss": 1.6018, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 72.05, |
|
"learning_rate": 8.394267515923567e-06, |
|
"loss": 1.4916, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 72.45, |
|
"learning_rate": 8.27484076433121e-06, |
|
"loss": 1.8035, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 72.85, |
|
"learning_rate": 8.155414012738854e-06, |
|
"loss": 1.3387, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 73.25, |
|
"learning_rate": 8.035987261146497e-06, |
|
"loss": 1.5553, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 73.65, |
|
"learning_rate": 7.91656050955414e-06, |
|
"loss": 1.5534, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 74.04, |
|
"learning_rate": 7.797133757961784e-06, |
|
"loss": 1.3926, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 74.44, |
|
"learning_rate": 7.677707006369427e-06, |
|
"loss": 1.5332, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 74.84, |
|
"learning_rate": 7.5582802547770704e-06, |
|
"loss": 1.5935, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 75.24, |
|
"learning_rate": 7.438853503184713e-06, |
|
"loss": 1.6671, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 75.64, |
|
"learning_rate": 7.319665605095542e-06, |
|
"loss": 1.662, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 76.04, |
|
"learning_rate": 7.200238853503185e-06, |
|
"loss": 1.6435, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 76.43, |
|
"learning_rate": 7.080812101910829e-06, |
|
"loss": 1.5282, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 76.83, |
|
"learning_rate": 6.961385350318472e-06, |
|
"loss": 1.6214, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 77.23, |
|
"learning_rate": 6.842197452229299e-06, |
|
"loss": 1.2999, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 77.63, |
|
"learning_rate": 6.722770700636943e-06, |
|
"loss": 1.4861, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 78.03, |
|
"learning_rate": 6.603343949044586e-06, |
|
"loss": 1.493, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 78.42, |
|
"learning_rate": 6.484156050955414e-06, |
|
"loss": 1.5782, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 78.82, |
|
"learning_rate": 6.364729299363058e-06, |
|
"loss": 1.6043, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 79.22, |
|
"learning_rate": 6.245302547770701e-06, |
|
"loss": 1.7383, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 79.62, |
|
"learning_rate": 6.1258757961783444e-06, |
|
"loss": 1.5453, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 80.02, |
|
"learning_rate": 6.006449044585987e-06, |
|
"loss": 1.6063, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 80.41, |
|
"learning_rate": 5.88702229299363e-06, |
|
"loss": 1.5471, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 80.81, |
|
"learning_rate": 5.767595541401274e-06, |
|
"loss": 1.6526, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 81.21, |
|
"learning_rate": 5.648168789808917e-06, |
|
"loss": 1.4817, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 81.61, |
|
"learning_rate": 5.528980891719745e-06, |
|
"loss": 1.3771, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 82.01, |
|
"learning_rate": 5.409554140127389e-06, |
|
"loss": 1.5052, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 82.4, |
|
"learning_rate": 5.290127388535032e-06, |
|
"loss": 1.7024, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 82.8, |
|
"learning_rate": 5.170700636942675e-06, |
|
"loss": 1.6985, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 83.2, |
|
"learning_rate": 5.051512738853503e-06, |
|
"loss": 1.5487, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 83.6, |
|
"learning_rate": 4.932085987261146e-06, |
|
"loss": 1.3429, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"learning_rate": 4.812659235668789e-06, |
|
"loss": 1.6937, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 84.39, |
|
"learning_rate": 4.6934713375796184e-06, |
|
"loss": 1.5639, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 84.79, |
|
"learning_rate": 4.574044585987262e-06, |
|
"loss": 1.5466, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 85.19, |
|
"learning_rate": 4.454617834394905e-06, |
|
"loss": 1.4726, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 85.59, |
|
"learning_rate": 4.3351910828025485e-06, |
|
"loss": 1.5633, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 85.99, |
|
"learning_rate": 4.215764331210192e-06, |
|
"loss": 1.5687, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 86.39, |
|
"learning_rate": 4.096337579617834e-06, |
|
"loss": 1.2633, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 86.78, |
|
"learning_rate": 3.976910828025478e-06, |
|
"loss": 1.5035, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 87.18, |
|
"learning_rate": 3.857484076433121e-06, |
|
"loss": 1.6182, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 87.58, |
|
"learning_rate": 3.7380573248407645e-06, |
|
"loss": 1.6295, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 87.98, |
|
"learning_rate": 3.618630573248408e-06, |
|
"loss": 1.5837, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 88.38, |
|
"learning_rate": 3.4992038216560512e-06, |
|
"loss": 1.3664, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 88.77, |
|
"learning_rate": 3.3797770700636946e-06, |
|
"loss": 1.6751, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 89.17, |
|
"learning_rate": 3.2605891719745224e-06, |
|
"loss": 1.5701, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 89.57, |
|
"learning_rate": 3.1411624203821653e-06, |
|
"loss": 1.4553, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 89.97, |
|
"learning_rate": 3.0217356687898087e-06, |
|
"loss": 1.4953, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 90.37, |
|
"learning_rate": 2.902308917197452e-06, |
|
"loss": 1.6721, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 90.76, |
|
"learning_rate": 2.7831210191082802e-06, |
|
"loss": 1.4171, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 91.16, |
|
"learning_rate": 2.6636942675159236e-06, |
|
"loss": 1.8271, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 91.56, |
|
"learning_rate": 2.544267515923567e-06, |
|
"loss": 1.6508, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 91.96, |
|
"learning_rate": 2.4248407643312103e-06, |
|
"loss": 1.5092, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 92.36, |
|
"learning_rate": 2.305652866242038e-06, |
|
"loss": 1.5075, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 92.75, |
|
"learning_rate": 2.1864649681528663e-06, |
|
"loss": 1.5702, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 93.15, |
|
"learning_rate": 2.0670382165605097e-06, |
|
"loss": 1.5646, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 93.55, |
|
"learning_rate": 1.947611464968153e-06, |
|
"loss": 1.4956, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 93.95, |
|
"learning_rate": 1.8281847133757964e-06, |
|
"loss": 1.5299, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 94.35, |
|
"learning_rate": 1.7087579617834395e-06, |
|
"loss": 1.5027, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 94.75, |
|
"learning_rate": 1.5893312101910827e-06, |
|
"loss": 1.5207, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 95.14, |
|
"learning_rate": 1.469904458598726e-06, |
|
"loss": 1.3726, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 95.54, |
|
"learning_rate": 1.3504777070063694e-06, |
|
"loss": 1.3402, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 95.94, |
|
"learning_rate": 1.2312898089171974e-06, |
|
"loss": 1.7752, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 96.34, |
|
"learning_rate": 1.1121019108280256e-06, |
|
"loss": 1.6895, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 96.74, |
|
"learning_rate": 9.926751592356687e-07, |
|
"loss": 1.633, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 97.13, |
|
"learning_rate": 8.732484076433121e-07, |
|
"loss": 1.6033, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 97.53, |
|
"learning_rate": 7.538216560509554e-07, |
|
"loss": 1.3726, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 97.93, |
|
"learning_rate": 6.343949044585987e-07, |
|
"loss": 1.3725, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 98.33, |
|
"learning_rate": 5.149681528662421e-07, |
|
"loss": 1.5944, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 98.73, |
|
"learning_rate": 3.9554140127388536e-07, |
|
"loss": 1.466, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 99.12, |
|
"learning_rate": 2.7611464968152867e-07, |
|
"loss": 1.421, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 99.52, |
|
"learning_rate": 1.569267515923567e-07, |
|
"loss": 1.6345, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 99.92, |
|
"learning_rate": 3.7500000000000005e-08, |
|
"loss": 1.5021, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 125600, |
|
"total_flos": 5.249087478816768e+17, |
|
"train_loss": 1.9559951608499904, |
|
"train_runtime": 50817.2182, |
|
"train_samples_per_second": 14.83, |
|
"train_steps_per_second": 2.472 |
|
} |
|
], |
|
"max_steps": 125600, |
|
"num_train_epochs": 100, |
|
"total_flos": 5.249087478816768e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|