opus-mt-en-it-finetuned-en-to-it / trainer_state.json
nid989's picture
Upload trainer_state.json
d000461
raw
history blame
31.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9976812782939256,
"global_step": 126500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.9921131914759374e-05,
"loss": 1.2922,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 1.984226382951875e-05,
"loss": 1.3127,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 1.976339574427812e-05,
"loss": 1.293,
"step": 1500
},
{
"epoch": 0.02,
"learning_rate": 1.9684527659037496e-05,
"loss": 1.2856,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 1.9605659573796868e-05,
"loss": 1.3064,
"step": 2500
},
{
"epoch": 0.02,
"learning_rate": 1.9526791488556244e-05,
"loss": 1.3068,
"step": 3000
},
{
"epoch": 0.03,
"learning_rate": 1.9447923403315616e-05,
"loss": 1.2982,
"step": 3500
},
{
"epoch": 0.03,
"learning_rate": 1.936905531807499e-05,
"loss": 1.303,
"step": 4000
},
{
"epoch": 0.04,
"learning_rate": 1.9290187232834363e-05,
"loss": 1.3007,
"step": 4500
},
{
"epoch": 0.04,
"learning_rate": 1.9211319147593738e-05,
"loss": 1.3172,
"step": 5000
},
{
"epoch": 0.04,
"learning_rate": 1.913245106235311e-05,
"loss": 1.325,
"step": 5500
},
{
"epoch": 0.05,
"learning_rate": 1.9053582977112482e-05,
"loss": 1.2849,
"step": 6000
},
{
"epoch": 0.05,
"learning_rate": 1.8974714891871857e-05,
"loss": 1.3189,
"step": 6500
},
{
"epoch": 0.06,
"learning_rate": 1.889584680663123e-05,
"loss": 1.2955,
"step": 7000
},
{
"epoch": 0.06,
"learning_rate": 1.8816978721390605e-05,
"loss": 1.3496,
"step": 7500
},
{
"epoch": 0.06,
"learning_rate": 1.8738110636149977e-05,
"loss": 1.3103,
"step": 8000
},
{
"epoch": 0.07,
"learning_rate": 1.8659242550909352e-05,
"loss": 1.2921,
"step": 8500
},
{
"epoch": 0.07,
"learning_rate": 1.8580374465668724e-05,
"loss": 1.303,
"step": 9000
},
{
"epoch": 0.07,
"learning_rate": 1.85015063804281e-05,
"loss": 1.2997,
"step": 9500
},
{
"epoch": 0.08,
"learning_rate": 1.842263829518747e-05,
"loss": 1.2692,
"step": 10000
},
{
"epoch": 0.08,
"learning_rate": 1.8343770209946847e-05,
"loss": 1.2915,
"step": 10500
},
{
"epoch": 0.09,
"learning_rate": 1.826490212470622e-05,
"loss": 1.2907,
"step": 11000
},
{
"epoch": 0.09,
"learning_rate": 1.818603403946559e-05,
"loss": 1.3233,
"step": 11500
},
{
"epoch": 0.09,
"learning_rate": 1.8107165954224966e-05,
"loss": 1.3047,
"step": 12000
},
{
"epoch": 0.1,
"learning_rate": 1.8028297868984338e-05,
"loss": 1.273,
"step": 12500
},
{
"epoch": 0.1,
"learning_rate": 1.7949429783743713e-05,
"loss": 1.3118,
"step": 13000
},
{
"epoch": 0.11,
"learning_rate": 1.7870561698503085e-05,
"loss": 1.3183,
"step": 13500
},
{
"epoch": 0.11,
"learning_rate": 1.779169361326246e-05,
"loss": 1.304,
"step": 14000
},
{
"epoch": 0.11,
"learning_rate": 1.7712825528021833e-05,
"loss": 1.3409,
"step": 14500
},
{
"epoch": 0.12,
"learning_rate": 1.7633957442781208e-05,
"loss": 1.3068,
"step": 15000
},
{
"epoch": 0.12,
"learning_rate": 1.755508935754058e-05,
"loss": 1.3038,
"step": 15500
},
{
"epoch": 0.13,
"learning_rate": 1.7476221272299955e-05,
"loss": 1.3331,
"step": 16000
},
{
"epoch": 0.13,
"learning_rate": 1.7397353187059327e-05,
"loss": 1.2997,
"step": 16500
},
{
"epoch": 0.13,
"learning_rate": 1.73184851018187e-05,
"loss": 1.2833,
"step": 17000
},
{
"epoch": 0.14,
"learning_rate": 1.723961701657807e-05,
"loss": 1.3178,
"step": 17500
},
{
"epoch": 0.14,
"learning_rate": 1.7160748931337447e-05,
"loss": 1.3263,
"step": 18000
},
{
"epoch": 0.15,
"learning_rate": 1.708188084609682e-05,
"loss": 1.2907,
"step": 18500
},
{
"epoch": 0.15,
"learning_rate": 1.7003012760856194e-05,
"loss": 1.3328,
"step": 19000
},
{
"epoch": 0.15,
"learning_rate": 1.6924144675615566e-05,
"loss": 1.3268,
"step": 19500
},
{
"epoch": 0.16,
"learning_rate": 1.6845276590374938e-05,
"loss": 1.3161,
"step": 20000
},
{
"epoch": 0.16,
"learning_rate": 1.6766408505134313e-05,
"loss": 1.3026,
"step": 20500
},
{
"epoch": 0.17,
"learning_rate": 1.6687540419893685e-05,
"loss": 1.3163,
"step": 21000
},
{
"epoch": 0.17,
"learning_rate": 1.660867233465306e-05,
"loss": 1.3202,
"step": 21500
},
{
"epoch": 0.17,
"learning_rate": 1.6529804249412432e-05,
"loss": 1.2872,
"step": 22000
},
{
"epoch": 0.18,
"learning_rate": 1.6450936164171808e-05,
"loss": 1.3209,
"step": 22500
},
{
"epoch": 0.18,
"learning_rate": 1.637206807893118e-05,
"loss": 1.3165,
"step": 23000
},
{
"epoch": 0.19,
"learning_rate": 1.6293199993690555e-05,
"loss": 1.3026,
"step": 23500
},
{
"epoch": 0.19,
"learning_rate": 1.6214331908449927e-05,
"loss": 1.3313,
"step": 24000
},
{
"epoch": 0.19,
"learning_rate": 1.6135463823209302e-05,
"loss": 1.3123,
"step": 24500
},
{
"epoch": 0.2,
"learning_rate": 1.6056595737968674e-05,
"loss": 1.3124,
"step": 25000
},
{
"epoch": 0.2,
"learning_rate": 1.5977727652728046e-05,
"loss": 1.3227,
"step": 25500
},
{
"epoch": 0.21,
"learning_rate": 1.589885956748742e-05,
"loss": 1.316,
"step": 26000
},
{
"epoch": 0.21,
"learning_rate": 1.5819991482246794e-05,
"loss": 1.2764,
"step": 26500
},
{
"epoch": 0.21,
"learning_rate": 1.574112339700617e-05,
"loss": 1.3096,
"step": 27000
},
{
"epoch": 0.22,
"learning_rate": 1.566225531176554e-05,
"loss": 1.3499,
"step": 27500
},
{
"epoch": 0.22,
"learning_rate": 1.5583387226524916e-05,
"loss": 1.3339,
"step": 28000
},
{
"epoch": 0.22,
"learning_rate": 1.5504519141284288e-05,
"loss": 1.3423,
"step": 28500
},
{
"epoch": 0.23,
"learning_rate": 1.5425651056043664e-05,
"loss": 1.3295,
"step": 29000
},
{
"epoch": 0.23,
"learning_rate": 1.5346782970803036e-05,
"loss": 1.3226,
"step": 29500
},
{
"epoch": 0.24,
"learning_rate": 1.526791488556241e-05,
"loss": 1.2861,
"step": 30000
},
{
"epoch": 0.24,
"learning_rate": 1.5189046800321783e-05,
"loss": 1.3504,
"step": 30500
},
{
"epoch": 0.24,
"learning_rate": 1.5110178715081157e-05,
"loss": 1.3619,
"step": 31000
},
{
"epoch": 0.25,
"learning_rate": 1.503131062984053e-05,
"loss": 1.329,
"step": 31500
},
{
"epoch": 0.25,
"learning_rate": 1.4952442544599904e-05,
"loss": 1.2826,
"step": 32000
},
{
"epoch": 0.26,
"learning_rate": 1.4873574459359277e-05,
"loss": 1.3075,
"step": 32500
},
{
"epoch": 0.26,
"learning_rate": 1.4794706374118651e-05,
"loss": 1.3293,
"step": 33000
},
{
"epoch": 0.26,
"learning_rate": 1.4715838288878023e-05,
"loss": 1.2638,
"step": 33500
},
{
"epoch": 0.27,
"learning_rate": 1.4636970203637397e-05,
"loss": 1.3282,
"step": 34000
},
{
"epoch": 0.27,
"learning_rate": 1.455810211839677e-05,
"loss": 1.3562,
"step": 34500
},
{
"epoch": 0.28,
"learning_rate": 1.4479234033156144e-05,
"loss": 1.3446,
"step": 35000
},
{
"epoch": 0.28,
"learning_rate": 1.4400365947915518e-05,
"loss": 1.3339,
"step": 35500
},
{
"epoch": 0.28,
"learning_rate": 1.4321497862674891e-05,
"loss": 1.31,
"step": 36000
},
{
"epoch": 0.29,
"learning_rate": 1.4242629777434265e-05,
"loss": 1.3224,
"step": 36500
},
{
"epoch": 0.29,
"learning_rate": 1.4163761692193639e-05,
"loss": 1.3715,
"step": 37000
},
{
"epoch": 0.3,
"learning_rate": 1.4084893606953012e-05,
"loss": 1.3081,
"step": 37500
},
{
"epoch": 0.3,
"learning_rate": 1.4006025521712386e-05,
"loss": 1.2892,
"step": 38000
},
{
"epoch": 0.3,
"learning_rate": 1.3927157436471758e-05,
"loss": 1.3046,
"step": 38500
},
{
"epoch": 0.31,
"learning_rate": 1.3848289351231132e-05,
"loss": 1.3344,
"step": 39000
},
{
"epoch": 0.31,
"learning_rate": 1.3769421265990505e-05,
"loss": 1.3303,
"step": 39500
},
{
"epoch": 0.32,
"learning_rate": 1.3690553180749879e-05,
"loss": 1.3237,
"step": 40000
},
{
"epoch": 0.32,
"learning_rate": 1.3611685095509253e-05,
"loss": 1.3162,
"step": 40500
},
{
"epoch": 0.32,
"learning_rate": 1.3532817010268626e-05,
"loss": 1.3496,
"step": 41000
},
{
"epoch": 0.33,
"learning_rate": 1.3453948925028e-05,
"loss": 1.355,
"step": 41500
},
{
"epoch": 0.33,
"learning_rate": 1.3375080839787374e-05,
"loss": 1.348,
"step": 42000
},
{
"epoch": 0.34,
"learning_rate": 1.3296212754546747e-05,
"loss": 1.344,
"step": 42500
},
{
"epoch": 0.34,
"learning_rate": 1.3217344669306121e-05,
"loss": 1.3256,
"step": 43000
},
{
"epoch": 0.34,
"learning_rate": 1.3138476584065495e-05,
"loss": 1.3724,
"step": 43500
},
{
"epoch": 0.35,
"learning_rate": 1.3059608498824867e-05,
"loss": 1.2848,
"step": 44000
},
{
"epoch": 0.35,
"learning_rate": 1.298074041358424e-05,
"loss": 1.3318,
"step": 44500
},
{
"epoch": 0.35,
"learning_rate": 1.2901872328343614e-05,
"loss": 1.3025,
"step": 45000
},
{
"epoch": 0.36,
"learning_rate": 1.2823004243102987e-05,
"loss": 1.2953,
"step": 45500
},
{
"epoch": 0.36,
"learning_rate": 1.2744136157862361e-05,
"loss": 1.3388,
"step": 46000
},
{
"epoch": 0.37,
"learning_rate": 1.2665268072621735e-05,
"loss": 1.3444,
"step": 46500
},
{
"epoch": 0.37,
"learning_rate": 1.2586399987381108e-05,
"loss": 1.3239,
"step": 47000
},
{
"epoch": 0.37,
"learning_rate": 1.2507531902140482e-05,
"loss": 1.296,
"step": 47500
},
{
"epoch": 0.38,
"learning_rate": 1.2428663816899856e-05,
"loss": 1.3024,
"step": 48000
},
{
"epoch": 0.38,
"learning_rate": 1.234979573165923e-05,
"loss": 1.33,
"step": 48500
},
{
"epoch": 0.39,
"learning_rate": 1.2270927646418603e-05,
"loss": 1.3103,
"step": 49000
},
{
"epoch": 0.39,
"learning_rate": 1.2192059561177975e-05,
"loss": 1.3585,
"step": 49500
},
{
"epoch": 0.39,
"learning_rate": 1.2113191475937349e-05,
"loss": 1.3155,
"step": 50000
},
{
"epoch": 0.4,
"learning_rate": 1.2034323390696722e-05,
"loss": 1.3307,
"step": 50500
},
{
"epoch": 0.4,
"learning_rate": 1.1955455305456094e-05,
"loss": 1.3344,
"step": 51000
},
{
"epoch": 0.41,
"learning_rate": 1.1876587220215468e-05,
"loss": 1.3154,
"step": 51500
},
{
"epoch": 0.41,
"learning_rate": 1.1797719134974842e-05,
"loss": 1.3233,
"step": 52000
},
{
"epoch": 0.41,
"learning_rate": 1.1718851049734215e-05,
"loss": 1.2995,
"step": 52500
},
{
"epoch": 0.42,
"learning_rate": 1.1639982964493587e-05,
"loss": 1.3273,
"step": 53000
},
{
"epoch": 0.42,
"learning_rate": 1.1561114879252961e-05,
"loss": 1.3384,
"step": 53500
},
{
"epoch": 0.43,
"learning_rate": 1.1482246794012335e-05,
"loss": 1.3127,
"step": 54000
},
{
"epoch": 0.43,
"learning_rate": 1.1403378708771708e-05,
"loss": 1.3681,
"step": 54500
},
{
"epoch": 0.43,
"learning_rate": 1.1324510623531082e-05,
"loss": 1.3499,
"step": 55000
},
{
"epoch": 0.44,
"learning_rate": 1.1245642538290456e-05,
"loss": 1.3132,
"step": 55500
},
{
"epoch": 0.44,
"learning_rate": 1.116677445304983e-05,
"loss": 1.3173,
"step": 56000
},
{
"epoch": 0.45,
"learning_rate": 1.1087906367809203e-05,
"loss": 1.3366,
"step": 56500
},
{
"epoch": 0.45,
"learning_rate": 1.1009038282568577e-05,
"loss": 1.3433,
"step": 57000
},
{
"epoch": 0.45,
"learning_rate": 1.093017019732795e-05,
"loss": 1.3168,
"step": 57500
},
{
"epoch": 0.46,
"learning_rate": 1.0851302112087324e-05,
"loss": 1.3489,
"step": 58000
},
{
"epoch": 0.46,
"learning_rate": 1.0772434026846696e-05,
"loss": 1.3441,
"step": 58500
},
{
"epoch": 0.47,
"learning_rate": 1.069356594160607e-05,
"loss": 1.2982,
"step": 59000
},
{
"epoch": 0.47,
"learning_rate": 1.0614697856365443e-05,
"loss": 1.3138,
"step": 59500
},
{
"epoch": 0.47,
"learning_rate": 1.0535829771124817e-05,
"loss": 1.3417,
"step": 60000
},
{
"epoch": 0.48,
"learning_rate": 1.045696168588419e-05,
"loss": 1.3107,
"step": 60500
},
{
"epoch": 0.48,
"learning_rate": 1.0378093600643564e-05,
"loss": 1.3524,
"step": 61000
},
{
"epoch": 0.49,
"learning_rate": 1.0299225515402938e-05,
"loss": 1.3509,
"step": 61500
},
{
"epoch": 0.49,
"learning_rate": 1.0220357430162311e-05,
"loss": 1.3469,
"step": 62000
},
{
"epoch": 0.49,
"learning_rate": 1.0141489344921685e-05,
"loss": 1.3231,
"step": 62500
},
{
"epoch": 0.5,
"learning_rate": 1.0062621259681059e-05,
"loss": 1.3218,
"step": 63000
},
{
"epoch": 0.5,
"learning_rate": 9.98375317444043e-06,
"loss": 1.3203,
"step": 63500
},
{
"epoch": 0.5,
"learning_rate": 9.904885089199804e-06,
"loss": 1.286,
"step": 64000
},
{
"epoch": 0.51,
"learning_rate": 9.826017003959178e-06,
"loss": 1.2927,
"step": 64500
},
{
"epoch": 0.51,
"learning_rate": 9.747148918718552e-06,
"loss": 1.3631,
"step": 65000
},
{
"epoch": 0.52,
"learning_rate": 9.668280833477925e-06,
"loss": 1.3556,
"step": 65500
},
{
"epoch": 0.52,
"learning_rate": 9.589412748237299e-06,
"loss": 1.3153,
"step": 66000
},
{
"epoch": 0.52,
"learning_rate": 9.510544662996673e-06,
"loss": 1.3567,
"step": 66500
},
{
"epoch": 0.53,
"learning_rate": 9.431676577756046e-06,
"loss": 1.2935,
"step": 67000
},
{
"epoch": 0.53,
"learning_rate": 9.35280849251542e-06,
"loss": 1.3497,
"step": 67500
},
{
"epoch": 0.54,
"learning_rate": 9.273940407274794e-06,
"loss": 1.3418,
"step": 68000
},
{
"epoch": 0.54,
"learning_rate": 9.195072322034167e-06,
"loss": 1.2812,
"step": 68500
},
{
"epoch": 0.54,
"learning_rate": 9.11620423679354e-06,
"loss": 1.3112,
"step": 69000
},
{
"epoch": 0.55,
"learning_rate": 9.037336151552913e-06,
"loss": 1.3378,
"step": 69500
},
{
"epoch": 0.55,
"learning_rate": 8.958468066312287e-06,
"loss": 1.3173,
"step": 70000
},
{
"epoch": 0.56,
"learning_rate": 8.87959998107166e-06,
"loss": 1.3013,
"step": 70500
},
{
"epoch": 0.56,
"learning_rate": 8.800731895831034e-06,
"loss": 1.3323,
"step": 71000
},
{
"epoch": 0.56,
"learning_rate": 8.721863810590407e-06,
"loss": 1.2787,
"step": 71500
},
{
"epoch": 0.57,
"learning_rate": 8.642995725349781e-06,
"loss": 1.3173,
"step": 72000
},
{
"epoch": 0.57,
"learning_rate": 8.564127640109155e-06,
"loss": 1.3196,
"step": 72500
},
{
"epoch": 0.58,
"learning_rate": 8.485259554868528e-06,
"loss": 1.2903,
"step": 73000
},
{
"epoch": 0.58,
"learning_rate": 8.406391469627902e-06,
"loss": 1.3292,
"step": 73500
},
{
"epoch": 0.58,
"learning_rate": 8.327523384387276e-06,
"loss": 1.3402,
"step": 74000
},
{
"epoch": 0.59,
"learning_rate": 8.248655299146648e-06,
"loss": 1.3504,
"step": 74500
},
{
"epoch": 0.59,
"learning_rate": 8.169787213906021e-06,
"loss": 1.318,
"step": 75000
},
{
"epoch": 0.6,
"learning_rate": 8.090919128665395e-06,
"loss": 1.3544,
"step": 75500
},
{
"epoch": 0.6,
"learning_rate": 8.012051043424769e-06,
"loss": 1.2946,
"step": 76000
},
{
"epoch": 0.6,
"learning_rate": 7.93318295818414e-06,
"loss": 1.357,
"step": 76500
},
{
"epoch": 0.61,
"learning_rate": 7.854314872943514e-06,
"loss": 1.3001,
"step": 77000
},
{
"epoch": 0.61,
"learning_rate": 7.775446787702888e-06,
"loss": 1.3008,
"step": 77500
},
{
"epoch": 0.62,
"learning_rate": 7.696578702462262e-06,
"loss": 1.3285,
"step": 78000
},
{
"epoch": 0.62,
"learning_rate": 7.617710617221635e-06,
"loss": 1.3149,
"step": 78500
},
{
"epoch": 0.62,
"learning_rate": 7.538842531981009e-06,
"loss": 1.2988,
"step": 79000
},
{
"epoch": 0.63,
"learning_rate": 7.459974446740383e-06,
"loss": 1.3254,
"step": 79500
},
{
"epoch": 0.63,
"learning_rate": 7.381106361499755e-06,
"loss": 1.3198,
"step": 80000
},
{
"epoch": 0.63,
"learning_rate": 7.302238276259129e-06,
"loss": 1.2625,
"step": 80500
},
{
"epoch": 0.64,
"learning_rate": 7.223370191018503e-06,
"loss": 1.3755,
"step": 81000
},
{
"epoch": 0.64,
"learning_rate": 7.144502105777876e-06,
"loss": 1.3148,
"step": 81500
},
{
"epoch": 0.65,
"learning_rate": 7.06563402053725e-06,
"loss": 1.2799,
"step": 82000
},
{
"epoch": 0.65,
"learning_rate": 6.986765935296624e-06,
"loss": 1.3211,
"step": 82500
},
{
"epoch": 0.65,
"learning_rate": 6.9078978500559965e-06,
"loss": 1.3325,
"step": 83000
},
{
"epoch": 0.66,
"learning_rate": 6.82902976481537e-06,
"loss": 1.2948,
"step": 83500
},
{
"epoch": 0.66,
"learning_rate": 6.750161679574744e-06,
"loss": 1.314,
"step": 84000
},
{
"epoch": 0.67,
"learning_rate": 6.6712935943341175e-06,
"loss": 1.2739,
"step": 84500
},
{
"epoch": 0.67,
"learning_rate": 6.592425509093491e-06,
"loss": 1.3231,
"step": 85000
},
{
"epoch": 0.67,
"learning_rate": 6.513557423852864e-06,
"loss": 1.3044,
"step": 85500
},
{
"epoch": 0.68,
"learning_rate": 6.434689338612238e-06,
"loss": 1.3177,
"step": 86000
},
{
"epoch": 0.68,
"learning_rate": 6.355821253371611e-06,
"loss": 1.3457,
"step": 86500
},
{
"epoch": 0.69,
"learning_rate": 6.276953168130985e-06,
"loss": 1.3338,
"step": 87000
},
{
"epoch": 0.69,
"learning_rate": 6.1980850828903586e-06,
"loss": 1.3145,
"step": 87500
},
{
"epoch": 0.69,
"learning_rate": 6.119216997649731e-06,
"loss": 1.2863,
"step": 88000
},
{
"epoch": 0.7,
"learning_rate": 6.040348912409105e-06,
"loss": 1.3334,
"step": 88500
},
{
"epoch": 0.7,
"learning_rate": 5.961480827168479e-06,
"loss": 1.2809,
"step": 89000
},
{
"epoch": 0.71,
"learning_rate": 5.882612741927852e-06,
"loss": 1.3164,
"step": 89500
},
{
"epoch": 0.71,
"learning_rate": 5.803744656687226e-06,
"loss": 1.2591,
"step": 90000
},
{
"epoch": 0.71,
"learning_rate": 5.7248765714466e-06,
"loss": 1.3127,
"step": 90500
},
{
"epoch": 0.72,
"learning_rate": 5.6460084862059725e-06,
"loss": 1.2838,
"step": 91000
},
{
"epoch": 0.72,
"learning_rate": 5.567140400965346e-06,
"loss": 1.2783,
"step": 91500
},
{
"epoch": 0.73,
"learning_rate": 5.48827231572472e-06,
"loss": 1.2546,
"step": 92000
},
{
"epoch": 0.73,
"learning_rate": 5.4094042304840934e-06,
"loss": 1.3429,
"step": 92500
},
{
"epoch": 0.73,
"learning_rate": 5.330536145243467e-06,
"loss": 1.3223,
"step": 93000
},
{
"epoch": 0.74,
"learning_rate": 5.251668060002839e-06,
"loss": 1.2925,
"step": 93500
},
{
"epoch": 0.74,
"learning_rate": 5.172799974762213e-06,
"loss": 1.2802,
"step": 94000
},
{
"epoch": 0.75,
"learning_rate": 5.093931889521586e-06,
"loss": 1.2491,
"step": 94500
},
{
"epoch": 0.75,
"learning_rate": 5.01506380428096e-06,
"loss": 1.3079,
"step": 95000
},
{
"epoch": 0.75,
"learning_rate": 4.936195719040334e-06,
"loss": 1.2788,
"step": 95500
},
{
"epoch": 0.76,
"learning_rate": 4.857327633799707e-06,
"loss": 1.2728,
"step": 96000
},
{
"epoch": 0.76,
"learning_rate": 4.778459548559081e-06,
"loss": 1.3208,
"step": 96500
},
{
"epoch": 0.77,
"learning_rate": 4.699591463318454e-06,
"loss": 1.3279,
"step": 97000
},
{
"epoch": 0.77,
"learning_rate": 4.6207233780778275e-06,
"loss": 1.3523,
"step": 97500
},
{
"epoch": 0.77,
"learning_rate": 4.5418552928372e-06,
"loss": 1.3168,
"step": 98000
},
{
"epoch": 0.78,
"learning_rate": 4.462987207596574e-06,
"loss": 1.2678,
"step": 98500
},
{
"epoch": 0.78,
"learning_rate": 4.384119122355948e-06,
"loss": 1.2477,
"step": 99000
},
{
"epoch": 0.78,
"learning_rate": 4.305251037115321e-06,
"loss": 1.3298,
"step": 99500
},
{
"epoch": 0.79,
"learning_rate": 4.226382951874695e-06,
"loss": 1.2886,
"step": 100000
},
{
"epoch": 0.79,
"learning_rate": 4.147514866634068e-06,
"loss": 1.3194,
"step": 100500
},
{
"epoch": 0.8,
"learning_rate": 4.068646781393441e-06,
"loss": 1.3336,
"step": 101000
},
{
"epoch": 0.8,
"learning_rate": 3.989778696152815e-06,
"loss": 1.28,
"step": 101500
},
{
"epoch": 0.8,
"learning_rate": 3.910910610912189e-06,
"loss": 1.2774,
"step": 102000
},
{
"epoch": 0.81,
"learning_rate": 3.832042525671562e-06,
"loss": 1.3041,
"step": 102500
},
{
"epoch": 0.81,
"learning_rate": 3.7531744404309356e-06,
"loss": 1.3761,
"step": 103000
},
{
"epoch": 0.82,
"learning_rate": 3.6743063551903092e-06,
"loss": 1.37,
"step": 103500
},
{
"epoch": 0.82,
"learning_rate": 3.5954382699496825e-06,
"loss": 1.3055,
"step": 104000
},
{
"epoch": 0.82,
"learning_rate": 3.516570184709056e-06,
"loss": 1.3361,
"step": 104500
},
{
"epoch": 0.83,
"learning_rate": 3.4377020994684298e-06,
"loss": 1.3031,
"step": 105000
},
{
"epoch": 0.83,
"learning_rate": 3.358834014227803e-06,
"loss": 1.3137,
"step": 105500
},
{
"epoch": 0.84,
"learning_rate": 3.2799659289871767e-06,
"loss": 1.2919,
"step": 106000
},
{
"epoch": 0.84,
"learning_rate": 3.2010978437465495e-06,
"loss": 1.2852,
"step": 106500
},
{
"epoch": 0.84,
"learning_rate": 3.122229758505923e-06,
"loss": 1.2751,
"step": 107000
},
{
"epoch": 0.85,
"learning_rate": 3.0433616732652964e-06,
"loss": 1.2906,
"step": 107500
},
{
"epoch": 0.85,
"learning_rate": 2.96449358802467e-06,
"loss": 1.3119,
"step": 108000
},
{
"epoch": 0.86,
"learning_rate": 2.8856255027840437e-06,
"loss": 1.2621,
"step": 108500
},
{
"epoch": 0.86,
"learning_rate": 2.806757417543417e-06,
"loss": 1.2994,
"step": 109000
},
{
"epoch": 0.86,
"learning_rate": 2.7278893323027906e-06,
"loss": 1.2909,
"step": 109500
},
{
"epoch": 0.87,
"learning_rate": 2.649021247062164e-06,
"loss": 1.3306,
"step": 110000
},
{
"epoch": 0.87,
"learning_rate": 2.5701531618215375e-06,
"loss": 1.2881,
"step": 110500
},
{
"epoch": 0.88,
"learning_rate": 2.491285076580911e-06,
"loss": 1.306,
"step": 111000
},
{
"epoch": 0.88,
"learning_rate": 2.4124169913402844e-06,
"loss": 1.3357,
"step": 111500
},
{
"epoch": 0.88,
"learning_rate": 2.333548906099658e-06,
"loss": 1.3152,
"step": 112000
},
{
"epoch": 0.89,
"learning_rate": 2.2546808208590317e-06,
"loss": 1.3011,
"step": 112500
},
{
"epoch": 0.89,
"learning_rate": 2.175812735618405e-06,
"loss": 1.2949,
"step": 113000
},
{
"epoch": 0.9,
"learning_rate": 2.096944650377778e-06,
"loss": 1.2997,
"step": 113500
},
{
"epoch": 0.9,
"learning_rate": 2.018076565137152e-06,
"loss": 1.3148,
"step": 114000
},
{
"epoch": 0.9,
"learning_rate": 1.939208479896525e-06,
"loss": 1.3464,
"step": 114500
},
{
"epoch": 0.91,
"learning_rate": 1.8603403946558987e-06,
"loss": 1.2722,
"step": 115000
},
{
"epoch": 0.91,
"learning_rate": 1.7814723094152721e-06,
"loss": 1.2361,
"step": 115500
},
{
"epoch": 0.91,
"learning_rate": 1.7026042241746458e-06,
"loss": 1.2932,
"step": 116000
},
{
"epoch": 0.92,
"learning_rate": 1.6237361389340192e-06,
"loss": 1.2929,
"step": 116500
},
{
"epoch": 0.92,
"learning_rate": 1.5448680536933925e-06,
"loss": 1.2514,
"step": 117000
},
{
"epoch": 0.93,
"learning_rate": 1.465999968452766e-06,
"loss": 1.3133,
"step": 117500
},
{
"epoch": 0.93,
"learning_rate": 1.3871318832121394e-06,
"loss": 1.2766,
"step": 118000
},
{
"epoch": 0.93,
"learning_rate": 1.308263797971513e-06,
"loss": 1.2892,
"step": 118500
},
{
"epoch": 0.94,
"learning_rate": 1.2293957127308865e-06,
"loss": 1.3185,
"step": 119000
},
{
"epoch": 0.94,
"learning_rate": 1.15052762749026e-06,
"loss": 1.299,
"step": 119500
},
{
"epoch": 0.95,
"learning_rate": 1.0716595422496333e-06,
"loss": 1.2878,
"step": 120000
},
{
"epoch": 0.95,
"learning_rate": 9.927914570090068e-07,
"loss": 1.2827,
"step": 120500
},
{
"epoch": 0.95,
"learning_rate": 9.139233717683802e-07,
"loss": 1.3103,
"step": 121000
},
{
"epoch": 0.96,
"learning_rate": 8.350552865277538e-07,
"loss": 1.3186,
"step": 121500
},
{
"epoch": 0.96,
"learning_rate": 7.561872012871272e-07,
"loss": 1.3046,
"step": 122000
},
{
"epoch": 0.97,
"learning_rate": 6.773191160465007e-07,
"loss": 1.2945,
"step": 122500
},
{
"epoch": 0.97,
"learning_rate": 5.984510308058741e-07,
"loss": 1.3193,
"step": 123000
},
{
"epoch": 0.97,
"learning_rate": 5.195829455652477e-07,
"loss": 1.3048,
"step": 123500
},
{
"epoch": 0.98,
"learning_rate": 4.4071486032462107e-07,
"loss": 1.2599,
"step": 124000
},
{
"epoch": 0.98,
"learning_rate": 3.6184677508399457e-07,
"loss": 1.2585,
"step": 124500
},
{
"epoch": 0.99,
"learning_rate": 2.82978689843368e-07,
"loss": 1.2506,
"step": 125000
},
{
"epoch": 0.99,
"learning_rate": 2.0411060460274149e-07,
"loss": 1.2928,
"step": 125500
},
{
"epoch": 0.99,
"learning_rate": 1.2524251936211493e-07,
"loss": 1.2959,
"step": 126000
},
{
"epoch": 1.0,
"learning_rate": 4.63744341214884e-08,
"loss": 1.2957,
"step": 126500
}
],
"max_steps": 126794,
"num_train_epochs": 1,
"total_flos": 4757862742622208.0,
"trial_name": null,
"trial_params": null
}