|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9976812782939256, |
|
"global_step": 126500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9921131914759374e-05, |
|
"loss": 1.2922, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.984226382951875e-05, |
|
"loss": 1.3127, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.976339574427812e-05, |
|
"loss": 1.293, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9684527659037496e-05, |
|
"loss": 1.2856, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9605659573796868e-05, |
|
"loss": 1.3064, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9526791488556244e-05, |
|
"loss": 1.3068, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9447923403315616e-05, |
|
"loss": 1.2982, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.936905531807499e-05, |
|
"loss": 1.303, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9290187232834363e-05, |
|
"loss": 1.3007, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9211319147593738e-05, |
|
"loss": 1.3172, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.913245106235311e-05, |
|
"loss": 1.325, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9053582977112482e-05, |
|
"loss": 1.2849, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8974714891871857e-05, |
|
"loss": 1.3189, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.889584680663123e-05, |
|
"loss": 1.2955, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8816978721390605e-05, |
|
"loss": 1.3496, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8738110636149977e-05, |
|
"loss": 1.3103, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8659242550909352e-05, |
|
"loss": 1.2921, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8580374465668724e-05, |
|
"loss": 1.303, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.85015063804281e-05, |
|
"loss": 1.2997, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.842263829518747e-05, |
|
"loss": 1.2692, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8343770209946847e-05, |
|
"loss": 1.2915, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.826490212470622e-05, |
|
"loss": 1.2907, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.818603403946559e-05, |
|
"loss": 1.3233, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8107165954224966e-05, |
|
"loss": 1.3047, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8028297868984338e-05, |
|
"loss": 1.273, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.7949429783743713e-05, |
|
"loss": 1.3118, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7870561698503085e-05, |
|
"loss": 1.3183, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.779169361326246e-05, |
|
"loss": 1.304, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7712825528021833e-05, |
|
"loss": 1.3409, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7633957442781208e-05, |
|
"loss": 1.3068, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.755508935754058e-05, |
|
"loss": 1.3038, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7476221272299955e-05, |
|
"loss": 1.3331, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7397353187059327e-05, |
|
"loss": 1.2997, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.73184851018187e-05, |
|
"loss": 1.2833, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.723961701657807e-05, |
|
"loss": 1.3178, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7160748931337447e-05, |
|
"loss": 1.3263, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.708188084609682e-05, |
|
"loss": 1.2907, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.7003012760856194e-05, |
|
"loss": 1.3328, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.6924144675615566e-05, |
|
"loss": 1.3268, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6845276590374938e-05, |
|
"loss": 1.3161, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6766408505134313e-05, |
|
"loss": 1.3026, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.6687540419893685e-05, |
|
"loss": 1.3163, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.660867233465306e-05, |
|
"loss": 1.3202, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.6529804249412432e-05, |
|
"loss": 1.2872, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6450936164171808e-05, |
|
"loss": 1.3209, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.637206807893118e-05, |
|
"loss": 1.3165, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6293199993690555e-05, |
|
"loss": 1.3026, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6214331908449927e-05, |
|
"loss": 1.3313, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6135463823209302e-05, |
|
"loss": 1.3123, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.6056595737968674e-05, |
|
"loss": 1.3124, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.5977727652728046e-05, |
|
"loss": 1.3227, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.589885956748742e-05, |
|
"loss": 1.316, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5819991482246794e-05, |
|
"loss": 1.2764, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.574112339700617e-05, |
|
"loss": 1.3096, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.566225531176554e-05, |
|
"loss": 1.3499, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.5583387226524916e-05, |
|
"loss": 1.3339, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.5504519141284288e-05, |
|
"loss": 1.3423, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5425651056043664e-05, |
|
"loss": 1.3295, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5346782970803036e-05, |
|
"loss": 1.3226, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.526791488556241e-05, |
|
"loss": 1.2861, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5189046800321783e-05, |
|
"loss": 1.3504, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5110178715081157e-05, |
|
"loss": 1.3619, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.503131062984053e-05, |
|
"loss": 1.329, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.4952442544599904e-05, |
|
"loss": 1.2826, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4873574459359277e-05, |
|
"loss": 1.3075, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4794706374118651e-05, |
|
"loss": 1.3293, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4715838288878023e-05, |
|
"loss": 1.2638, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4636970203637397e-05, |
|
"loss": 1.3282, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.455810211839677e-05, |
|
"loss": 1.3562, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4479234033156144e-05, |
|
"loss": 1.3446, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4400365947915518e-05, |
|
"loss": 1.3339, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4321497862674891e-05, |
|
"loss": 1.31, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4242629777434265e-05, |
|
"loss": 1.3224, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4163761692193639e-05, |
|
"loss": 1.3715, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4084893606953012e-05, |
|
"loss": 1.3081, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4006025521712386e-05, |
|
"loss": 1.2892, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.3927157436471758e-05, |
|
"loss": 1.3046, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.3848289351231132e-05, |
|
"loss": 1.3344, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.3769421265990505e-05, |
|
"loss": 1.3303, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3690553180749879e-05, |
|
"loss": 1.3237, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3611685095509253e-05, |
|
"loss": 1.3162, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3532817010268626e-05, |
|
"loss": 1.3496, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3453948925028e-05, |
|
"loss": 1.355, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3375080839787374e-05, |
|
"loss": 1.348, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3296212754546747e-05, |
|
"loss": 1.344, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3217344669306121e-05, |
|
"loss": 1.3256, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3138476584065495e-05, |
|
"loss": 1.3724, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.3059608498824867e-05, |
|
"loss": 1.2848, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.298074041358424e-05, |
|
"loss": 1.3318, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.2901872328343614e-05, |
|
"loss": 1.3025, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2823004243102987e-05, |
|
"loss": 1.2953, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2744136157862361e-05, |
|
"loss": 1.3388, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2665268072621735e-05, |
|
"loss": 1.3444, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2586399987381108e-05, |
|
"loss": 1.3239, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2507531902140482e-05, |
|
"loss": 1.296, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2428663816899856e-05, |
|
"loss": 1.3024, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.234979573165923e-05, |
|
"loss": 1.33, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2270927646418603e-05, |
|
"loss": 1.3103, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2192059561177975e-05, |
|
"loss": 1.3585, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2113191475937349e-05, |
|
"loss": 1.3155, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.2034323390696722e-05, |
|
"loss": 1.3307, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.1955455305456094e-05, |
|
"loss": 1.3344, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1876587220215468e-05, |
|
"loss": 1.3154, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1797719134974842e-05, |
|
"loss": 1.3233, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1718851049734215e-05, |
|
"loss": 1.2995, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1639982964493587e-05, |
|
"loss": 1.3273, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1561114879252961e-05, |
|
"loss": 1.3384, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1482246794012335e-05, |
|
"loss": 1.3127, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1403378708771708e-05, |
|
"loss": 1.3681, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1324510623531082e-05, |
|
"loss": 1.3499, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1245642538290456e-05, |
|
"loss": 1.3132, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.116677445304983e-05, |
|
"loss": 1.3173, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1087906367809203e-05, |
|
"loss": 1.3366, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1009038282568577e-05, |
|
"loss": 1.3433, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.093017019732795e-05, |
|
"loss": 1.3168, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0851302112087324e-05, |
|
"loss": 1.3489, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0772434026846696e-05, |
|
"loss": 1.3441, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.069356594160607e-05, |
|
"loss": 1.2982, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0614697856365443e-05, |
|
"loss": 1.3138, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0535829771124817e-05, |
|
"loss": 1.3417, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.045696168588419e-05, |
|
"loss": 1.3107, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0378093600643564e-05, |
|
"loss": 1.3524, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0299225515402938e-05, |
|
"loss": 1.3509, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0220357430162311e-05, |
|
"loss": 1.3469, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0141489344921685e-05, |
|
"loss": 1.3231, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0062621259681059e-05, |
|
"loss": 1.3218, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.98375317444043e-06, |
|
"loss": 1.3203, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.904885089199804e-06, |
|
"loss": 1.286, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.826017003959178e-06, |
|
"loss": 1.2927, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.747148918718552e-06, |
|
"loss": 1.3631, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.668280833477925e-06, |
|
"loss": 1.3556, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.589412748237299e-06, |
|
"loss": 1.3153, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.510544662996673e-06, |
|
"loss": 1.3567, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.431676577756046e-06, |
|
"loss": 1.2935, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.35280849251542e-06, |
|
"loss": 1.3497, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.273940407274794e-06, |
|
"loss": 1.3418, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.195072322034167e-06, |
|
"loss": 1.2812, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.11620423679354e-06, |
|
"loss": 1.3112, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.037336151552913e-06, |
|
"loss": 1.3378, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.958468066312287e-06, |
|
"loss": 1.3173, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.87959998107166e-06, |
|
"loss": 1.3013, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.800731895831034e-06, |
|
"loss": 1.3323, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.721863810590407e-06, |
|
"loss": 1.2787, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.642995725349781e-06, |
|
"loss": 1.3173, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.564127640109155e-06, |
|
"loss": 1.3196, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.485259554868528e-06, |
|
"loss": 1.2903, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.406391469627902e-06, |
|
"loss": 1.3292, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.327523384387276e-06, |
|
"loss": 1.3402, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.248655299146648e-06, |
|
"loss": 1.3504, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.169787213906021e-06, |
|
"loss": 1.318, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.090919128665395e-06, |
|
"loss": 1.3544, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.012051043424769e-06, |
|
"loss": 1.2946, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.93318295818414e-06, |
|
"loss": 1.357, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.854314872943514e-06, |
|
"loss": 1.3001, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.775446787702888e-06, |
|
"loss": 1.3008, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.696578702462262e-06, |
|
"loss": 1.3285, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.617710617221635e-06, |
|
"loss": 1.3149, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.538842531981009e-06, |
|
"loss": 1.2988, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.459974446740383e-06, |
|
"loss": 1.3254, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.381106361499755e-06, |
|
"loss": 1.3198, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.302238276259129e-06, |
|
"loss": 1.2625, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.223370191018503e-06, |
|
"loss": 1.3755, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.144502105777876e-06, |
|
"loss": 1.3148, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.06563402053725e-06, |
|
"loss": 1.2799, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.986765935296624e-06, |
|
"loss": 1.3211, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.9078978500559965e-06, |
|
"loss": 1.3325, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.82902976481537e-06, |
|
"loss": 1.2948, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.750161679574744e-06, |
|
"loss": 1.314, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.6712935943341175e-06, |
|
"loss": 1.2739, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.592425509093491e-06, |
|
"loss": 1.3231, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.513557423852864e-06, |
|
"loss": 1.3044, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.434689338612238e-06, |
|
"loss": 1.3177, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.355821253371611e-06, |
|
"loss": 1.3457, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.276953168130985e-06, |
|
"loss": 1.3338, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.1980850828903586e-06, |
|
"loss": 1.3145, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.119216997649731e-06, |
|
"loss": 1.2863, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.040348912409105e-06, |
|
"loss": 1.3334, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.961480827168479e-06, |
|
"loss": 1.2809, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.882612741927852e-06, |
|
"loss": 1.3164, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.803744656687226e-06, |
|
"loss": 1.2591, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.7248765714466e-06, |
|
"loss": 1.3127, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.6460084862059725e-06, |
|
"loss": 1.2838, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.567140400965346e-06, |
|
"loss": 1.2783, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.48827231572472e-06, |
|
"loss": 1.2546, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.4094042304840934e-06, |
|
"loss": 1.3429, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.330536145243467e-06, |
|
"loss": 1.3223, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.251668060002839e-06, |
|
"loss": 1.2925, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.172799974762213e-06, |
|
"loss": 1.2802, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.093931889521586e-06, |
|
"loss": 1.2491, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.01506380428096e-06, |
|
"loss": 1.3079, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.936195719040334e-06, |
|
"loss": 1.2788, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.857327633799707e-06, |
|
"loss": 1.2728, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.778459548559081e-06, |
|
"loss": 1.3208, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.699591463318454e-06, |
|
"loss": 1.3279, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.6207233780778275e-06, |
|
"loss": 1.3523, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.5418552928372e-06, |
|
"loss": 1.3168, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.462987207596574e-06, |
|
"loss": 1.2678, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.384119122355948e-06, |
|
"loss": 1.2477, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.305251037115321e-06, |
|
"loss": 1.3298, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.226382951874695e-06, |
|
"loss": 1.2886, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.147514866634068e-06, |
|
"loss": 1.3194, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.068646781393441e-06, |
|
"loss": 1.3336, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.989778696152815e-06, |
|
"loss": 1.28, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.910910610912189e-06, |
|
"loss": 1.2774, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.832042525671562e-06, |
|
"loss": 1.3041, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.7531744404309356e-06, |
|
"loss": 1.3761, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6743063551903092e-06, |
|
"loss": 1.37, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.5954382699496825e-06, |
|
"loss": 1.3055, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.516570184709056e-06, |
|
"loss": 1.3361, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.4377020994684298e-06, |
|
"loss": 1.3031, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.358834014227803e-06, |
|
"loss": 1.3137, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.2799659289871767e-06, |
|
"loss": 1.2919, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.2010978437465495e-06, |
|
"loss": 1.2852, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.122229758505923e-06, |
|
"loss": 1.2751, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.0433616732652964e-06, |
|
"loss": 1.2906, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.96449358802467e-06, |
|
"loss": 1.3119, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.8856255027840437e-06, |
|
"loss": 1.2621, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.806757417543417e-06, |
|
"loss": 1.2994, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7278893323027906e-06, |
|
"loss": 1.2909, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.649021247062164e-06, |
|
"loss": 1.3306, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5701531618215375e-06, |
|
"loss": 1.2881, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.491285076580911e-06, |
|
"loss": 1.306, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.4124169913402844e-06, |
|
"loss": 1.3357, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.333548906099658e-06, |
|
"loss": 1.3152, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.2546808208590317e-06, |
|
"loss": 1.3011, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.175812735618405e-06, |
|
"loss": 1.2949, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.096944650377778e-06, |
|
"loss": 1.2997, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.018076565137152e-06, |
|
"loss": 1.3148, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.939208479896525e-06, |
|
"loss": 1.3464, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.8603403946558987e-06, |
|
"loss": 1.2722, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.7814723094152721e-06, |
|
"loss": 1.2361, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.7026042241746458e-06, |
|
"loss": 1.2932, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6237361389340192e-06, |
|
"loss": 1.2929, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.5448680536933925e-06, |
|
"loss": 1.2514, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.465999968452766e-06, |
|
"loss": 1.3133, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3871318832121394e-06, |
|
"loss": 1.2766, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.308263797971513e-06, |
|
"loss": 1.2892, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.2293957127308865e-06, |
|
"loss": 1.3185, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.15052762749026e-06, |
|
"loss": 1.299, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.0716595422496333e-06, |
|
"loss": 1.2878, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.927914570090068e-07, |
|
"loss": 1.2827, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.139233717683802e-07, |
|
"loss": 1.3103, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.350552865277538e-07, |
|
"loss": 1.3186, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.561872012871272e-07, |
|
"loss": 1.3046, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.773191160465007e-07, |
|
"loss": 1.2945, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.984510308058741e-07, |
|
"loss": 1.3193, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.195829455652477e-07, |
|
"loss": 1.3048, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.4071486032462107e-07, |
|
"loss": 1.2599, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.6184677508399457e-07, |
|
"loss": 1.2585, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.82978689843368e-07, |
|
"loss": 1.2506, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.0411060460274149e-07, |
|
"loss": 1.2928, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2524251936211493e-07, |
|
"loss": 1.2959, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.63744341214884e-08, |
|
"loss": 1.2957, |
|
"step": 126500 |
|
} |
|
], |
|
"max_steps": 126794, |
|
"num_train_epochs": 1, |
|
"total_flos": 4757862742622208.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|