{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9976812782939256, "global_step": 126500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9921131914759374e-05, "loss": 1.2922, "step": 500 }, { "epoch": 0.01, "learning_rate": 1.984226382951875e-05, "loss": 1.3127, "step": 1000 }, { "epoch": 0.01, "learning_rate": 1.976339574427812e-05, "loss": 1.293, "step": 1500 }, { "epoch": 0.02, "learning_rate": 1.9684527659037496e-05, "loss": 1.2856, "step": 2000 }, { "epoch": 0.02, "learning_rate": 1.9605659573796868e-05, "loss": 1.3064, "step": 2500 }, { "epoch": 0.02, "learning_rate": 1.9526791488556244e-05, "loss": 1.3068, "step": 3000 }, { "epoch": 0.03, "learning_rate": 1.9447923403315616e-05, "loss": 1.2982, "step": 3500 }, { "epoch": 0.03, "learning_rate": 1.936905531807499e-05, "loss": 1.303, "step": 4000 }, { "epoch": 0.04, "learning_rate": 1.9290187232834363e-05, "loss": 1.3007, "step": 4500 }, { "epoch": 0.04, "learning_rate": 1.9211319147593738e-05, "loss": 1.3172, "step": 5000 }, { "epoch": 0.04, "learning_rate": 1.913245106235311e-05, "loss": 1.325, "step": 5500 }, { "epoch": 0.05, "learning_rate": 1.9053582977112482e-05, "loss": 1.2849, "step": 6000 }, { "epoch": 0.05, "learning_rate": 1.8974714891871857e-05, "loss": 1.3189, "step": 6500 }, { "epoch": 0.06, "learning_rate": 1.889584680663123e-05, "loss": 1.2955, "step": 7000 }, { "epoch": 0.06, "learning_rate": 1.8816978721390605e-05, "loss": 1.3496, "step": 7500 }, { "epoch": 0.06, "learning_rate": 1.8738110636149977e-05, "loss": 1.3103, "step": 8000 }, { "epoch": 0.07, "learning_rate": 1.8659242550909352e-05, "loss": 1.2921, "step": 8500 }, { "epoch": 0.07, "learning_rate": 1.8580374465668724e-05, "loss": 1.303, "step": 9000 }, { "epoch": 0.07, "learning_rate": 1.85015063804281e-05, "loss": 1.2997, "step": 9500 }, { "epoch": 0.08, "learning_rate": 1.842263829518747e-05, "loss": 1.2692, "step": 10000 }, { "epoch": 0.08, "learning_rate": 1.8343770209946847e-05, "loss": 1.2915, "step": 10500 }, { "epoch": 0.09, "learning_rate": 1.826490212470622e-05, "loss": 1.2907, "step": 11000 }, { "epoch": 0.09, "learning_rate": 1.818603403946559e-05, "loss": 1.3233, "step": 11500 }, { "epoch": 0.09, "learning_rate": 1.8107165954224966e-05, "loss": 1.3047, "step": 12000 }, { "epoch": 0.1, "learning_rate": 1.8028297868984338e-05, "loss": 1.273, "step": 12500 }, { "epoch": 0.1, "learning_rate": 1.7949429783743713e-05, "loss": 1.3118, "step": 13000 }, { "epoch": 0.11, "learning_rate": 1.7870561698503085e-05, "loss": 1.3183, "step": 13500 }, { "epoch": 0.11, "learning_rate": 1.779169361326246e-05, "loss": 1.304, "step": 14000 }, { "epoch": 0.11, "learning_rate": 1.7712825528021833e-05, "loss": 1.3409, "step": 14500 }, { "epoch": 0.12, "learning_rate": 1.7633957442781208e-05, "loss": 1.3068, "step": 15000 }, { "epoch": 0.12, "learning_rate": 1.755508935754058e-05, "loss": 1.3038, "step": 15500 }, { "epoch": 0.13, "learning_rate": 1.7476221272299955e-05, "loss": 1.3331, "step": 16000 }, { "epoch": 0.13, "learning_rate": 1.7397353187059327e-05, "loss": 1.2997, "step": 16500 }, { "epoch": 0.13, "learning_rate": 1.73184851018187e-05, "loss": 1.2833, "step": 17000 }, { "epoch": 0.14, "learning_rate": 1.723961701657807e-05, "loss": 1.3178, "step": 17500 }, { "epoch": 0.14, "learning_rate": 1.7160748931337447e-05, "loss": 1.3263, "step": 18000 }, { "epoch": 0.15, "learning_rate": 1.708188084609682e-05, "loss": 1.2907, "step": 18500 }, { "epoch": 0.15, "learning_rate": 1.7003012760856194e-05, "loss": 1.3328, "step": 19000 }, { "epoch": 0.15, "learning_rate": 1.6924144675615566e-05, "loss": 1.3268, "step": 19500 }, { "epoch": 0.16, "learning_rate": 1.6845276590374938e-05, "loss": 1.3161, "step": 20000 }, { "epoch": 0.16, "learning_rate": 1.6766408505134313e-05, "loss": 1.3026, "step": 20500 }, { "epoch": 0.17, "learning_rate": 1.6687540419893685e-05, "loss": 1.3163, "step": 21000 }, { "epoch": 0.17, "learning_rate": 1.660867233465306e-05, "loss": 1.3202, "step": 21500 }, { "epoch": 0.17, "learning_rate": 1.6529804249412432e-05, "loss": 1.2872, "step": 22000 }, { "epoch": 0.18, "learning_rate": 1.6450936164171808e-05, "loss": 1.3209, "step": 22500 }, { "epoch": 0.18, "learning_rate": 1.637206807893118e-05, "loss": 1.3165, "step": 23000 }, { "epoch": 0.19, "learning_rate": 1.6293199993690555e-05, "loss": 1.3026, "step": 23500 }, { "epoch": 0.19, "learning_rate": 1.6214331908449927e-05, "loss": 1.3313, "step": 24000 }, { "epoch": 0.19, "learning_rate": 1.6135463823209302e-05, "loss": 1.3123, "step": 24500 }, { "epoch": 0.2, "learning_rate": 1.6056595737968674e-05, "loss": 1.3124, "step": 25000 }, { "epoch": 0.2, "learning_rate": 1.5977727652728046e-05, "loss": 1.3227, "step": 25500 }, { "epoch": 0.21, "learning_rate": 1.589885956748742e-05, "loss": 1.316, "step": 26000 }, { "epoch": 0.21, "learning_rate": 1.5819991482246794e-05, "loss": 1.2764, "step": 26500 }, { "epoch": 0.21, "learning_rate": 1.574112339700617e-05, "loss": 1.3096, "step": 27000 }, { "epoch": 0.22, "learning_rate": 1.566225531176554e-05, "loss": 1.3499, "step": 27500 }, { "epoch": 0.22, "learning_rate": 1.5583387226524916e-05, "loss": 1.3339, "step": 28000 }, { "epoch": 0.22, "learning_rate": 1.5504519141284288e-05, "loss": 1.3423, "step": 28500 }, { "epoch": 0.23, "learning_rate": 1.5425651056043664e-05, "loss": 1.3295, "step": 29000 }, { "epoch": 0.23, "learning_rate": 1.5346782970803036e-05, "loss": 1.3226, "step": 29500 }, { "epoch": 0.24, "learning_rate": 1.526791488556241e-05, "loss": 1.2861, "step": 30000 }, { "epoch": 0.24, "learning_rate": 1.5189046800321783e-05, "loss": 1.3504, "step": 30500 }, { "epoch": 0.24, "learning_rate": 1.5110178715081157e-05, "loss": 1.3619, "step": 31000 }, { "epoch": 0.25, "learning_rate": 1.503131062984053e-05, "loss": 1.329, "step": 31500 }, { "epoch": 0.25, "learning_rate": 1.4952442544599904e-05, "loss": 1.2826, "step": 32000 }, { "epoch": 0.26, "learning_rate": 1.4873574459359277e-05, "loss": 1.3075, "step": 32500 }, { "epoch": 0.26, "learning_rate": 1.4794706374118651e-05, "loss": 1.3293, "step": 33000 }, { "epoch": 0.26, "learning_rate": 1.4715838288878023e-05, "loss": 1.2638, "step": 33500 }, { "epoch": 0.27, "learning_rate": 1.4636970203637397e-05, "loss": 1.3282, "step": 34000 }, { "epoch": 0.27, "learning_rate": 1.455810211839677e-05, "loss": 1.3562, "step": 34500 }, { "epoch": 0.28, "learning_rate": 1.4479234033156144e-05, "loss": 1.3446, "step": 35000 }, { "epoch": 0.28, "learning_rate": 1.4400365947915518e-05, "loss": 1.3339, "step": 35500 }, { "epoch": 0.28, "learning_rate": 1.4321497862674891e-05, "loss": 1.31, "step": 36000 }, { "epoch": 0.29, "learning_rate": 1.4242629777434265e-05, "loss": 1.3224, "step": 36500 }, { "epoch": 0.29, "learning_rate": 1.4163761692193639e-05, "loss": 1.3715, "step": 37000 }, { "epoch": 0.3, "learning_rate": 1.4084893606953012e-05, "loss": 1.3081, "step": 37500 }, { "epoch": 0.3, "learning_rate": 1.4006025521712386e-05, "loss": 1.2892, "step": 38000 }, { "epoch": 0.3, "learning_rate": 1.3927157436471758e-05, "loss": 1.3046, "step": 38500 }, { "epoch": 0.31, "learning_rate": 1.3848289351231132e-05, "loss": 1.3344, "step": 39000 }, { "epoch": 0.31, "learning_rate": 1.3769421265990505e-05, "loss": 1.3303, "step": 39500 }, { "epoch": 0.32, "learning_rate": 1.3690553180749879e-05, "loss": 1.3237, "step": 40000 }, { "epoch": 0.32, "learning_rate": 1.3611685095509253e-05, "loss": 1.3162, "step": 40500 }, { "epoch": 0.32, "learning_rate": 1.3532817010268626e-05, "loss": 1.3496, "step": 41000 }, { "epoch": 0.33, "learning_rate": 1.3453948925028e-05, "loss": 1.355, "step": 41500 }, { "epoch": 0.33, "learning_rate": 1.3375080839787374e-05, "loss": 1.348, "step": 42000 }, { "epoch": 0.34, "learning_rate": 1.3296212754546747e-05, "loss": 1.344, "step": 42500 }, { "epoch": 0.34, "learning_rate": 1.3217344669306121e-05, "loss": 1.3256, "step": 43000 }, { "epoch": 0.34, "learning_rate": 1.3138476584065495e-05, "loss": 1.3724, "step": 43500 }, { "epoch": 0.35, "learning_rate": 1.3059608498824867e-05, "loss": 1.2848, "step": 44000 }, { "epoch": 0.35, "learning_rate": 1.298074041358424e-05, "loss": 1.3318, "step": 44500 }, { "epoch": 0.35, "learning_rate": 1.2901872328343614e-05, "loss": 1.3025, "step": 45000 }, { "epoch": 0.36, "learning_rate": 1.2823004243102987e-05, "loss": 1.2953, "step": 45500 }, { "epoch": 0.36, "learning_rate": 1.2744136157862361e-05, "loss": 1.3388, "step": 46000 }, { "epoch": 0.37, "learning_rate": 1.2665268072621735e-05, "loss": 1.3444, "step": 46500 }, { "epoch": 0.37, "learning_rate": 1.2586399987381108e-05, "loss": 1.3239, "step": 47000 }, { "epoch": 0.37, "learning_rate": 1.2507531902140482e-05, "loss": 1.296, "step": 47500 }, { "epoch": 0.38, "learning_rate": 1.2428663816899856e-05, "loss": 1.3024, "step": 48000 }, { "epoch": 0.38, "learning_rate": 1.234979573165923e-05, "loss": 1.33, "step": 48500 }, { "epoch": 0.39, "learning_rate": 1.2270927646418603e-05, "loss": 1.3103, "step": 49000 }, { "epoch": 0.39, "learning_rate": 1.2192059561177975e-05, "loss": 1.3585, "step": 49500 }, { "epoch": 0.39, "learning_rate": 1.2113191475937349e-05, "loss": 1.3155, "step": 50000 }, { "epoch": 0.4, "learning_rate": 1.2034323390696722e-05, "loss": 1.3307, "step": 50500 }, { "epoch": 0.4, "learning_rate": 1.1955455305456094e-05, "loss": 1.3344, "step": 51000 }, { "epoch": 0.41, "learning_rate": 1.1876587220215468e-05, "loss": 1.3154, "step": 51500 }, { "epoch": 0.41, "learning_rate": 1.1797719134974842e-05, "loss": 1.3233, "step": 52000 }, { "epoch": 0.41, "learning_rate": 1.1718851049734215e-05, "loss": 1.2995, "step": 52500 }, { "epoch": 0.42, "learning_rate": 1.1639982964493587e-05, "loss": 1.3273, "step": 53000 }, { "epoch": 0.42, "learning_rate": 1.1561114879252961e-05, "loss": 1.3384, "step": 53500 }, { "epoch": 0.43, "learning_rate": 1.1482246794012335e-05, "loss": 1.3127, "step": 54000 }, { "epoch": 0.43, "learning_rate": 1.1403378708771708e-05, "loss": 1.3681, "step": 54500 }, { "epoch": 0.43, "learning_rate": 1.1324510623531082e-05, "loss": 1.3499, "step": 55000 }, { "epoch": 0.44, "learning_rate": 1.1245642538290456e-05, "loss": 1.3132, "step": 55500 }, { "epoch": 0.44, "learning_rate": 1.116677445304983e-05, "loss": 1.3173, "step": 56000 }, { "epoch": 0.45, "learning_rate": 1.1087906367809203e-05, "loss": 1.3366, "step": 56500 }, { "epoch": 0.45, "learning_rate": 1.1009038282568577e-05, "loss": 1.3433, "step": 57000 }, { "epoch": 0.45, "learning_rate": 1.093017019732795e-05, "loss": 1.3168, "step": 57500 }, { "epoch": 0.46, "learning_rate": 1.0851302112087324e-05, "loss": 1.3489, "step": 58000 }, { "epoch": 0.46, "learning_rate": 1.0772434026846696e-05, "loss": 1.3441, "step": 58500 }, { "epoch": 0.47, "learning_rate": 1.069356594160607e-05, "loss": 1.2982, "step": 59000 }, { "epoch": 0.47, "learning_rate": 1.0614697856365443e-05, "loss": 1.3138, "step": 59500 }, { "epoch": 0.47, "learning_rate": 1.0535829771124817e-05, "loss": 1.3417, "step": 60000 }, { "epoch": 0.48, "learning_rate": 1.045696168588419e-05, "loss": 1.3107, "step": 60500 }, { "epoch": 0.48, "learning_rate": 1.0378093600643564e-05, "loss": 1.3524, "step": 61000 }, { "epoch": 0.49, "learning_rate": 1.0299225515402938e-05, "loss": 1.3509, "step": 61500 }, { "epoch": 0.49, "learning_rate": 1.0220357430162311e-05, "loss": 1.3469, "step": 62000 }, { "epoch": 0.49, "learning_rate": 1.0141489344921685e-05, "loss": 1.3231, "step": 62500 }, { "epoch": 0.5, "learning_rate": 1.0062621259681059e-05, "loss": 1.3218, "step": 63000 }, { "epoch": 0.5, "learning_rate": 9.98375317444043e-06, "loss": 1.3203, "step": 63500 }, { "epoch": 0.5, "learning_rate": 9.904885089199804e-06, "loss": 1.286, "step": 64000 }, { "epoch": 0.51, "learning_rate": 9.826017003959178e-06, "loss": 1.2927, "step": 64500 }, { "epoch": 0.51, "learning_rate": 9.747148918718552e-06, "loss": 1.3631, "step": 65000 }, { "epoch": 0.52, "learning_rate": 9.668280833477925e-06, "loss": 1.3556, "step": 65500 }, { "epoch": 0.52, "learning_rate": 9.589412748237299e-06, "loss": 1.3153, "step": 66000 }, { "epoch": 0.52, "learning_rate": 9.510544662996673e-06, "loss": 1.3567, "step": 66500 }, { "epoch": 0.53, "learning_rate": 9.431676577756046e-06, "loss": 1.2935, "step": 67000 }, { "epoch": 0.53, "learning_rate": 9.35280849251542e-06, "loss": 1.3497, "step": 67500 }, { "epoch": 0.54, "learning_rate": 9.273940407274794e-06, "loss": 1.3418, "step": 68000 }, { "epoch": 0.54, "learning_rate": 9.195072322034167e-06, "loss": 1.2812, "step": 68500 }, { "epoch": 0.54, "learning_rate": 9.11620423679354e-06, "loss": 1.3112, "step": 69000 }, { "epoch": 0.55, "learning_rate": 9.037336151552913e-06, "loss": 1.3378, "step": 69500 }, { "epoch": 0.55, "learning_rate": 8.958468066312287e-06, "loss": 1.3173, "step": 70000 }, { "epoch": 0.56, "learning_rate": 8.87959998107166e-06, "loss": 1.3013, "step": 70500 }, { "epoch": 0.56, "learning_rate": 8.800731895831034e-06, "loss": 1.3323, "step": 71000 }, { "epoch": 0.56, "learning_rate": 8.721863810590407e-06, "loss": 1.2787, "step": 71500 }, { "epoch": 0.57, "learning_rate": 8.642995725349781e-06, "loss": 1.3173, "step": 72000 }, { "epoch": 0.57, "learning_rate": 8.564127640109155e-06, "loss": 1.3196, "step": 72500 }, { "epoch": 0.58, "learning_rate": 8.485259554868528e-06, "loss": 1.2903, "step": 73000 }, { "epoch": 0.58, "learning_rate": 8.406391469627902e-06, "loss": 1.3292, "step": 73500 }, { "epoch": 0.58, "learning_rate": 8.327523384387276e-06, "loss": 1.3402, "step": 74000 }, { "epoch": 0.59, "learning_rate": 8.248655299146648e-06, "loss": 1.3504, "step": 74500 }, { "epoch": 0.59, "learning_rate": 8.169787213906021e-06, "loss": 1.318, "step": 75000 }, { "epoch": 0.6, "learning_rate": 8.090919128665395e-06, "loss": 1.3544, "step": 75500 }, { "epoch": 0.6, "learning_rate": 8.012051043424769e-06, "loss": 1.2946, "step": 76000 }, { "epoch": 0.6, "learning_rate": 7.93318295818414e-06, "loss": 1.357, "step": 76500 }, { "epoch": 0.61, "learning_rate": 7.854314872943514e-06, "loss": 1.3001, "step": 77000 }, { "epoch": 0.61, "learning_rate": 7.775446787702888e-06, "loss": 1.3008, "step": 77500 }, { "epoch": 0.62, "learning_rate": 7.696578702462262e-06, "loss": 1.3285, "step": 78000 }, { "epoch": 0.62, "learning_rate": 7.617710617221635e-06, "loss": 1.3149, "step": 78500 }, { "epoch": 0.62, "learning_rate": 7.538842531981009e-06, "loss": 1.2988, "step": 79000 }, { "epoch": 0.63, "learning_rate": 7.459974446740383e-06, "loss": 1.3254, "step": 79500 }, { "epoch": 0.63, "learning_rate": 7.381106361499755e-06, "loss": 1.3198, "step": 80000 }, { "epoch": 0.63, "learning_rate": 7.302238276259129e-06, "loss": 1.2625, "step": 80500 }, { "epoch": 0.64, "learning_rate": 7.223370191018503e-06, "loss": 1.3755, "step": 81000 }, { "epoch": 0.64, "learning_rate": 7.144502105777876e-06, "loss": 1.3148, "step": 81500 }, { "epoch": 0.65, "learning_rate": 7.06563402053725e-06, "loss": 1.2799, "step": 82000 }, { "epoch": 0.65, "learning_rate": 6.986765935296624e-06, "loss": 1.3211, "step": 82500 }, { "epoch": 0.65, "learning_rate": 6.9078978500559965e-06, "loss": 1.3325, "step": 83000 }, { "epoch": 0.66, "learning_rate": 6.82902976481537e-06, "loss": 1.2948, "step": 83500 }, { "epoch": 0.66, "learning_rate": 6.750161679574744e-06, "loss": 1.314, "step": 84000 }, { "epoch": 0.67, "learning_rate": 6.6712935943341175e-06, "loss": 1.2739, "step": 84500 }, { "epoch": 0.67, "learning_rate": 6.592425509093491e-06, "loss": 1.3231, "step": 85000 }, { "epoch": 0.67, "learning_rate": 6.513557423852864e-06, "loss": 1.3044, "step": 85500 }, { "epoch": 0.68, "learning_rate": 6.434689338612238e-06, "loss": 1.3177, "step": 86000 }, { "epoch": 0.68, "learning_rate": 6.355821253371611e-06, "loss": 1.3457, "step": 86500 }, { "epoch": 0.69, "learning_rate": 6.276953168130985e-06, "loss": 1.3338, "step": 87000 }, { "epoch": 0.69, "learning_rate": 6.1980850828903586e-06, "loss": 1.3145, "step": 87500 }, { "epoch": 0.69, "learning_rate": 6.119216997649731e-06, "loss": 1.2863, "step": 88000 }, { "epoch": 0.7, "learning_rate": 6.040348912409105e-06, "loss": 1.3334, "step": 88500 }, { "epoch": 0.7, "learning_rate": 5.961480827168479e-06, "loss": 1.2809, "step": 89000 }, { "epoch": 0.71, "learning_rate": 5.882612741927852e-06, "loss": 1.3164, "step": 89500 }, { "epoch": 0.71, "learning_rate": 5.803744656687226e-06, "loss": 1.2591, "step": 90000 }, { "epoch": 0.71, "learning_rate": 5.7248765714466e-06, "loss": 1.3127, "step": 90500 }, { "epoch": 0.72, "learning_rate": 5.6460084862059725e-06, "loss": 1.2838, "step": 91000 }, { "epoch": 0.72, "learning_rate": 5.567140400965346e-06, "loss": 1.2783, "step": 91500 }, { "epoch": 0.73, "learning_rate": 5.48827231572472e-06, "loss": 1.2546, "step": 92000 }, { "epoch": 0.73, "learning_rate": 5.4094042304840934e-06, "loss": 1.3429, "step": 92500 }, { "epoch": 0.73, "learning_rate": 5.330536145243467e-06, "loss": 1.3223, "step": 93000 }, { "epoch": 0.74, "learning_rate": 5.251668060002839e-06, "loss": 1.2925, "step": 93500 }, { "epoch": 0.74, "learning_rate": 5.172799974762213e-06, "loss": 1.2802, "step": 94000 }, { "epoch": 0.75, "learning_rate": 5.093931889521586e-06, "loss": 1.2491, "step": 94500 }, { "epoch": 0.75, "learning_rate": 5.01506380428096e-06, "loss": 1.3079, "step": 95000 }, { "epoch": 0.75, "learning_rate": 4.936195719040334e-06, "loss": 1.2788, "step": 95500 }, { "epoch": 0.76, "learning_rate": 4.857327633799707e-06, "loss": 1.2728, "step": 96000 }, { "epoch": 0.76, "learning_rate": 4.778459548559081e-06, "loss": 1.3208, "step": 96500 }, { "epoch": 0.77, "learning_rate": 4.699591463318454e-06, "loss": 1.3279, "step": 97000 }, { "epoch": 0.77, "learning_rate": 4.6207233780778275e-06, "loss": 1.3523, "step": 97500 }, { "epoch": 0.77, "learning_rate": 4.5418552928372e-06, "loss": 1.3168, "step": 98000 }, { "epoch": 0.78, "learning_rate": 4.462987207596574e-06, "loss": 1.2678, "step": 98500 }, { "epoch": 0.78, "learning_rate": 4.384119122355948e-06, "loss": 1.2477, "step": 99000 }, { "epoch": 0.78, "learning_rate": 4.305251037115321e-06, "loss": 1.3298, "step": 99500 }, { "epoch": 0.79, "learning_rate": 4.226382951874695e-06, "loss": 1.2886, "step": 100000 }, { "epoch": 0.79, "learning_rate": 4.147514866634068e-06, "loss": 1.3194, "step": 100500 }, { "epoch": 0.8, "learning_rate": 4.068646781393441e-06, "loss": 1.3336, "step": 101000 }, { "epoch": 0.8, "learning_rate": 3.989778696152815e-06, "loss": 1.28, "step": 101500 }, { "epoch": 0.8, "learning_rate": 3.910910610912189e-06, "loss": 1.2774, "step": 102000 }, { "epoch": 0.81, "learning_rate": 3.832042525671562e-06, "loss": 1.3041, "step": 102500 }, { "epoch": 0.81, "learning_rate": 3.7531744404309356e-06, "loss": 1.3761, "step": 103000 }, { "epoch": 0.82, "learning_rate": 3.6743063551903092e-06, "loss": 1.37, "step": 103500 }, { "epoch": 0.82, "learning_rate": 3.5954382699496825e-06, "loss": 1.3055, "step": 104000 }, { "epoch": 0.82, "learning_rate": 3.516570184709056e-06, "loss": 1.3361, "step": 104500 }, { "epoch": 0.83, "learning_rate": 3.4377020994684298e-06, "loss": 1.3031, "step": 105000 }, { "epoch": 0.83, "learning_rate": 3.358834014227803e-06, "loss": 1.3137, "step": 105500 }, { "epoch": 0.84, "learning_rate": 3.2799659289871767e-06, "loss": 1.2919, "step": 106000 }, { "epoch": 0.84, "learning_rate": 3.2010978437465495e-06, "loss": 1.2852, "step": 106500 }, { "epoch": 0.84, "learning_rate": 3.122229758505923e-06, "loss": 1.2751, "step": 107000 }, { "epoch": 0.85, "learning_rate": 3.0433616732652964e-06, "loss": 1.2906, "step": 107500 }, { "epoch": 0.85, "learning_rate": 2.96449358802467e-06, "loss": 1.3119, "step": 108000 }, { "epoch": 0.86, "learning_rate": 2.8856255027840437e-06, "loss": 1.2621, "step": 108500 }, { "epoch": 0.86, "learning_rate": 2.806757417543417e-06, "loss": 1.2994, "step": 109000 }, { "epoch": 0.86, "learning_rate": 2.7278893323027906e-06, "loss": 1.2909, "step": 109500 }, { "epoch": 0.87, "learning_rate": 2.649021247062164e-06, "loss": 1.3306, "step": 110000 }, { "epoch": 0.87, "learning_rate": 2.5701531618215375e-06, "loss": 1.2881, "step": 110500 }, { "epoch": 0.88, "learning_rate": 2.491285076580911e-06, "loss": 1.306, "step": 111000 }, { "epoch": 0.88, "learning_rate": 2.4124169913402844e-06, "loss": 1.3357, "step": 111500 }, { "epoch": 0.88, "learning_rate": 2.333548906099658e-06, "loss": 1.3152, "step": 112000 }, { "epoch": 0.89, "learning_rate": 2.2546808208590317e-06, "loss": 1.3011, "step": 112500 }, { "epoch": 0.89, "learning_rate": 2.175812735618405e-06, "loss": 1.2949, "step": 113000 }, { "epoch": 0.9, "learning_rate": 2.096944650377778e-06, "loss": 1.2997, "step": 113500 }, { "epoch": 0.9, "learning_rate": 2.018076565137152e-06, "loss": 1.3148, "step": 114000 }, { "epoch": 0.9, "learning_rate": 1.939208479896525e-06, "loss": 1.3464, "step": 114500 }, { "epoch": 0.91, "learning_rate": 1.8603403946558987e-06, "loss": 1.2722, "step": 115000 }, { "epoch": 0.91, "learning_rate": 1.7814723094152721e-06, "loss": 1.2361, "step": 115500 }, { "epoch": 0.91, "learning_rate": 1.7026042241746458e-06, "loss": 1.2932, "step": 116000 }, { "epoch": 0.92, "learning_rate": 1.6237361389340192e-06, "loss": 1.2929, "step": 116500 }, { "epoch": 0.92, "learning_rate": 1.5448680536933925e-06, "loss": 1.2514, "step": 117000 }, { "epoch": 0.93, "learning_rate": 1.465999968452766e-06, "loss": 1.3133, "step": 117500 }, { "epoch": 0.93, "learning_rate": 1.3871318832121394e-06, "loss": 1.2766, "step": 118000 }, { "epoch": 0.93, "learning_rate": 1.308263797971513e-06, "loss": 1.2892, "step": 118500 }, { "epoch": 0.94, "learning_rate": 1.2293957127308865e-06, "loss": 1.3185, "step": 119000 }, { "epoch": 0.94, "learning_rate": 1.15052762749026e-06, "loss": 1.299, "step": 119500 }, { "epoch": 0.95, "learning_rate": 1.0716595422496333e-06, "loss": 1.2878, "step": 120000 }, { "epoch": 0.95, "learning_rate": 9.927914570090068e-07, "loss": 1.2827, "step": 120500 }, { "epoch": 0.95, "learning_rate": 9.139233717683802e-07, "loss": 1.3103, "step": 121000 }, { "epoch": 0.96, "learning_rate": 8.350552865277538e-07, "loss": 1.3186, "step": 121500 }, { "epoch": 0.96, "learning_rate": 7.561872012871272e-07, "loss": 1.3046, "step": 122000 }, { "epoch": 0.97, "learning_rate": 6.773191160465007e-07, "loss": 1.2945, "step": 122500 }, { "epoch": 0.97, "learning_rate": 5.984510308058741e-07, "loss": 1.3193, "step": 123000 }, { "epoch": 0.97, "learning_rate": 5.195829455652477e-07, "loss": 1.3048, "step": 123500 }, { "epoch": 0.98, "learning_rate": 4.4071486032462107e-07, "loss": 1.2599, "step": 124000 }, { "epoch": 0.98, "learning_rate": 3.6184677508399457e-07, "loss": 1.2585, "step": 124500 }, { "epoch": 0.99, "learning_rate": 2.82978689843368e-07, "loss": 1.2506, "step": 125000 }, { "epoch": 0.99, "learning_rate": 2.0411060460274149e-07, "loss": 1.2928, "step": 125500 }, { "epoch": 0.99, "learning_rate": 1.2524251936211493e-07, "loss": 1.2959, "step": 126000 }, { "epoch": 1.0, "learning_rate": 4.63744341214884e-08, "loss": 1.2957, "step": 126500 } ], "max_steps": 126794, "num_train_epochs": 1, "total_flos": 4757862742622208.0, "trial_name": null, "trial_params": null }