|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 32850, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.997199391171994e-05, |
|
"loss": 2.9757, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.994155251141553e-05, |
|
"loss": 2.7383, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.9911111111111114e-05, |
|
"loss": 2.5865, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.98806697108067e-05, |
|
"loss": 2.6042, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.9850228310502285e-05, |
|
"loss": 2.7077, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.982100456621005e-05, |
|
"loss": 2.8069, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.9790563165905637e-05, |
|
"loss": 2.5831, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.976012176560122e-05, |
|
"loss": 2.5526, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.972968036529681e-05, |
|
"loss": 2.5865, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.9699238964992394e-05, |
|
"loss": 2.4963, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.966879756468798e-05, |
|
"loss": 2.5408, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.9638356164383565e-05, |
|
"loss": 2.7108, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.960791476407915e-05, |
|
"loss": 2.6353, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.9577473363774736e-05, |
|
"loss": 2.538, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.954703196347032e-05, |
|
"loss": 2.6099, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.951659056316591e-05, |
|
"loss": 2.569, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.948614916286149e-05, |
|
"loss": 2.4251, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.945570776255708e-05, |
|
"loss": 2.4954, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.9425266362252665e-05, |
|
"loss": 2.3763, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.939482496194826e-05, |
|
"loss": 2.5156, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.936438356164384e-05, |
|
"loss": 2.4705, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.933394216133942e-05, |
|
"loss": 2.5698, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.930350076103501e-05, |
|
"loss": 2.497, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.92730593607306e-05, |
|
"loss": 2.3479, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.9242617960426186e-05, |
|
"loss": 2.4028, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.921217656012177e-05, |
|
"loss": 2.4164, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.918173515981735e-05, |
|
"loss": 2.5796, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.915129375951294e-05, |
|
"loss": 2.5134, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.912085235920853e-05, |
|
"loss": 2.4413, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.9090410958904114e-05, |
|
"loss": 2.4925, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.90599695585997e-05, |
|
"loss": 2.4394, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9029528158295285e-05, |
|
"loss": 2.4334, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.899908675799087e-05, |
|
"loss": 2.3809, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.896864535768646e-05, |
|
"loss": 2.4713, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.893820395738204e-05, |
|
"loss": 2.3322, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.890776255707763e-05, |
|
"loss": 2.3121, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.8877321156773214e-05, |
|
"loss": 2.3795, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.88468797564688e-05, |
|
"loss": 2.377, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.8816438356164385e-05, |
|
"loss": 2.4454, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.878599695585998e-05, |
|
"loss": 2.4502, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8755555555555556e-05, |
|
"loss": 2.423, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.872511415525114e-05, |
|
"loss": 2.3717, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.869467275494673e-05, |
|
"loss": 2.4881, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.866423135464232e-05, |
|
"loss": 2.3898, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.8633789954337906e-05, |
|
"loss": 2.4626, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.860334855403349e-05, |
|
"loss": 2.3674, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.857290715372907e-05, |
|
"loss": 2.3929, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.854246575342466e-05, |
|
"loss": 2.3222, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.851202435312025e-05, |
|
"loss": 2.453, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8481582952815834e-05, |
|
"loss": 2.4782, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.845114155251142e-05, |
|
"loss": 2.5002, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.8420700152207006e-05, |
|
"loss": 2.3431, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.839025875190259e-05, |
|
"loss": 2.3825, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.835981735159818e-05, |
|
"loss": 2.4681, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.832937595129376e-05, |
|
"loss": 2.4481, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.829893455098935e-05, |
|
"loss": 2.3781, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.8268493150684934e-05, |
|
"loss": 2.309, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.823805175038052e-05, |
|
"loss": 2.4879, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.8207610350076105e-05, |
|
"loss": 2.3736, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.81771689497717e-05, |
|
"loss": 2.5061, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.814672754946728e-05, |
|
"loss": 2.3825, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.811628614916286e-05, |
|
"loss": 2.3851, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.808584474885845e-05, |
|
"loss": 2.3361, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.805540334855404e-05, |
|
"loss": 2.4237, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.8024961948249626e-05, |
|
"loss": 2.3025, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 3.7994520547945205e-05, |
|
"loss": 2.3862, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.796407914764079e-05, |
|
"loss": 2.3268, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.793363774733638e-05, |
|
"loss": 2.3134, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.790319634703197e-05, |
|
"loss": 2.3043, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.7872754946727555e-05, |
|
"loss": 2.2921, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.784231354642314e-05, |
|
"loss": 2.3658, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.7811872146118726e-05, |
|
"loss": 2.3773, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.778143074581431e-05, |
|
"loss": 2.3391, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.77509893455099e-05, |
|
"loss": 2.3664, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.772054794520548e-05, |
|
"loss": 2.3339, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.769010654490107e-05, |
|
"loss": 2.2858, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.7659665144596654e-05, |
|
"loss": 2.344, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.762922374429224e-05, |
|
"loss": 2.2802, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.7598782343987826e-05, |
|
"loss": 2.2779, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.756834094368341e-05, |
|
"loss": 2.3558, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.7537899543379e-05, |
|
"loss": 2.3082, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.750745814307458e-05, |
|
"loss": 2.3199, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.747701674277017e-05, |
|
"loss": 2.4683, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.744657534246576e-05, |
|
"loss": 2.4058, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.741613394216135e-05, |
|
"loss": 2.3419, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.7385692541856926e-05, |
|
"loss": 2.2594, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.735525114155251e-05, |
|
"loss": 2.224, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.7324809741248104e-05, |
|
"loss": 2.4244, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.729436834094369e-05, |
|
"loss": 2.2928, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.7263926940639275e-05, |
|
"loss": 2.2732, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.7233485540334854e-05, |
|
"loss": 2.4067, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.7203044140030446e-05, |
|
"loss": 2.3029, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.717260273972603e-05, |
|
"loss": 2.3482, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.714216133942162e-05, |
|
"loss": 2.2777, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.7111719939117203e-05, |
|
"loss": 2.2948, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.708127853881279e-05, |
|
"loss": 2.3709, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7050837138508375e-05, |
|
"loss": 2.3924, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.702039573820396e-05, |
|
"loss": 2.3185, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.6989954337899546e-05, |
|
"loss": 2.3042, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.695951293759513e-05, |
|
"loss": 2.3062, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.692907153729072e-05, |
|
"loss": 2.2792, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.68986301369863e-05, |
|
"loss": 2.2511, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.686818873668189e-05, |
|
"loss": 2.2938, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.683774733637748e-05, |
|
"loss": 2.3167, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.680730593607306e-05, |
|
"loss": 2.3586, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6776864535768646e-05, |
|
"loss": 2.2705, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.674642313546423e-05, |
|
"loss": 2.1603, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6715981735159824e-05, |
|
"loss": 2.3594, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.668554033485541e-05, |
|
"loss": 2.308, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6655098934550995e-05, |
|
"loss": 2.3469, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6624657534246574e-05, |
|
"loss": 2.3153, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.659421613394217e-05, |
|
"loss": 2.3836, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.656377473363775e-05, |
|
"loss": 2.2617, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.653333333333334e-05, |
|
"loss": 2.3834, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.6502891933028924e-05, |
|
"loss": 2.2582, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.647245053272451e-05, |
|
"loss": 2.2326, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.6442009132420095e-05, |
|
"loss": 2.2157, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.641156773211568e-05, |
|
"loss": 2.2988, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.6381126331811267e-05, |
|
"loss": 2.2304, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.635068493150685e-05, |
|
"loss": 2.1554, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.632024353120244e-05, |
|
"loss": 2.2901, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.6289802130898024e-05, |
|
"loss": 2.1346, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.625936073059361e-05, |
|
"loss": 2.2658, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.62289193302892e-05, |
|
"loss": 2.2892, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.619847792998478e-05, |
|
"loss": 2.4276, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.6168036529680366e-05, |
|
"loss": 2.3805, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.613759512937595e-05, |
|
"loss": 2.3277, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.6107153729071544e-05, |
|
"loss": 2.253, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.607671232876713e-05, |
|
"loss": 2.2018, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.604627092846271e-05, |
|
"loss": 2.3574, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.6015829528158295e-05, |
|
"loss": 2.3147, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.598538812785389e-05, |
|
"loss": 2.247, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.595494672754947e-05, |
|
"loss": 1.9512, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.592450532724506e-05, |
|
"loss": 2.0597, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.5894063926940644e-05, |
|
"loss": 2.0195, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.586362252663623e-05, |
|
"loss": 1.9563, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.5833181126331816e-05, |
|
"loss": 1.9845, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.58027397260274e-05, |
|
"loss": 2.0274, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.577229832572299e-05, |
|
"loss": 2.051, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.574185692541857e-05, |
|
"loss": 1.9961, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.571141552511416e-05, |
|
"loss": 1.9761, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.5680974124809744e-05, |
|
"loss": 2.0424, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.565053272450533e-05, |
|
"loss": 1.9622, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.5620091324200915e-05, |
|
"loss": 1.992, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.55896499238965e-05, |
|
"loss": 2.0852, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.555920852359209e-05, |
|
"loss": 2.1086, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.552876712328767e-05, |
|
"loss": 2.0231, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.5498325722983265e-05, |
|
"loss": 1.9472, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.546788432267885e-05, |
|
"loss": 1.9655, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.543744292237443e-05, |
|
"loss": 2.04, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.5407001522070015e-05, |
|
"loss": 2.1005, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.537656012176561e-05, |
|
"loss": 2.1219, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.534611872146119e-05, |
|
"loss": 2.046, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.531567732115678e-05, |
|
"loss": 2.0985, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.528523592085236e-05, |
|
"loss": 2.0795, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.525479452054795e-05, |
|
"loss": 2.0962, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.5224353120243536e-05, |
|
"loss": 2.052, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.519391171993912e-05, |
|
"loss": 2.1188, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.516347031963471e-05, |
|
"loss": 2.0075, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.513302891933029e-05, |
|
"loss": 2.0194, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.510258751902588e-05, |
|
"loss": 2.0319, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.5072146118721464e-05, |
|
"loss": 1.9817, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.504170471841705e-05, |
|
"loss": 2.0719, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.5011263318112636e-05, |
|
"loss": 1.9931, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.498082191780822e-05, |
|
"loss": 1.9983, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.495038051750381e-05, |
|
"loss": 2.1237, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.491993911719939e-05, |
|
"loss": 2.011, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.4889497716894985e-05, |
|
"loss": 2.0294, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.4859056316590564e-05, |
|
"loss": 2.063, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.482861491628615e-05, |
|
"loss": 2.0753, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.4798173515981735e-05, |
|
"loss": 2.0859, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.476773211567733e-05, |
|
"loss": 1.9657, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.4737290715372914e-05, |
|
"loss": 2.0901, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.47068493150685e-05, |
|
"loss": 2.0066, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.467640791476408e-05, |
|
"loss": 2.0134, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.464596651445967e-05, |
|
"loss": 2.0645, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.4615525114155256e-05, |
|
"loss": 2.0559, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.458508371385084e-05, |
|
"loss": 2.1435, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.455464231354643e-05, |
|
"loss": 1.9392, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.452420091324201e-05, |
|
"loss": 2.0839, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.44937595129376e-05, |
|
"loss": 2.1076, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.4463318112633185e-05, |
|
"loss": 2.0119, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.443287671232877e-05, |
|
"loss": 2.1092, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.4402435312024356e-05, |
|
"loss": 2.0383, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.437199391171994e-05, |
|
"loss": 2.1265, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.434155251141553e-05, |
|
"loss": 2.07, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.431111111111111e-05, |
|
"loss": 2.0152, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.42806697108067e-05, |
|
"loss": 2.008, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.4250228310502284e-05, |
|
"loss": 1.9992, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.421978691019787e-05, |
|
"loss": 2.0191, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.4189345509893456e-05, |
|
"loss": 2.0505, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.415890410958904e-05, |
|
"loss": 2.0542, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.4128462709284634e-05, |
|
"loss": 2.1455, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.409802130898021e-05, |
|
"loss": 1.9989, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.40675799086758e-05, |
|
"loss": 2.0734, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.4037138508371384e-05, |
|
"loss": 1.9921, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.400669710806698e-05, |
|
"loss": 2.1307, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.397625570776256e-05, |
|
"loss": 2.0824, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.394581430745815e-05, |
|
"loss": 1.9928, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.391537290715373e-05, |
|
"loss": 2.0448, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 3.388493150684932e-05, |
|
"loss": 2.0779, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.3854490106544905e-05, |
|
"loss": 2.0343, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.382404870624049e-05, |
|
"loss": 1.9332, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 3.3793607305936076e-05, |
|
"loss": 2.1341, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 3.376316590563166e-05, |
|
"loss": 1.978, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 3.373272450532725e-05, |
|
"loss": 1.9498, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 3.3702283105022834e-05, |
|
"loss": 1.9063, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 3.367184170471842e-05, |
|
"loss": 2.0495, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 3.3641400304414005e-05, |
|
"loss": 2.02, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.361095890410959e-05, |
|
"loss": 1.973, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.3580517503805176e-05, |
|
"loss": 1.96, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.355007610350076e-05, |
|
"loss": 2.0725, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.351963470319635e-05, |
|
"loss": 2.1469, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.348919330289193e-05, |
|
"loss": 2.0721, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.345875190258752e-05, |
|
"loss": 2.1235, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.3428310502283105e-05, |
|
"loss": 1.9994, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.33978691019787e-05, |
|
"loss": 2.0654, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.336742770167428e-05, |
|
"loss": 2.0628, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.333698630136986e-05, |
|
"loss": 2.0541, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.330654490106545e-05, |
|
"loss": 2.0388, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.327610350076104e-05, |
|
"loss": 1.9874, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.3245662100456625e-05, |
|
"loss": 2.0574, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.321522070015221e-05, |
|
"loss": 1.9692, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.31847792998478e-05, |
|
"loss": 1.932, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.315433789954338e-05, |
|
"loss": 1.8747, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.312389649923897e-05, |
|
"loss": 2.0928, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.3093455098934554e-05, |
|
"loss": 2.1114, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.306301369863014e-05, |
|
"loss": 2.0353, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.3032572298325725e-05, |
|
"loss": 1.9485, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.300213089802131e-05, |
|
"loss": 2.0028, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.2971689497716897e-05, |
|
"loss": 2.026, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.294124809741248e-05, |
|
"loss": 2.1633, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.291080669710807e-05, |
|
"loss": 2.0141, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.2880365296803654e-05, |
|
"loss": 1.9247, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.284992389649924e-05, |
|
"loss": 2.005, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.2819482496194825e-05, |
|
"loss": 2.0607, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.278904109589042e-05, |
|
"loss": 2.0251, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.2758599695585996e-05, |
|
"loss": 2.1067, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.272815829528158e-05, |
|
"loss": 1.97, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.269771689497717e-05, |
|
"loss": 2.0772, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.266727549467276e-05, |
|
"loss": 2.0554, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.2636834094368346e-05, |
|
"loss": 1.9819, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.260639269406393e-05, |
|
"loss": 2.0674, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.257595129375951e-05, |
|
"loss": 2.0516, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.25455098934551e-05, |
|
"loss": 2.0866, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.251506849315069e-05, |
|
"loss": 1.9859, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.2484627092846274e-05, |
|
"loss": 1.9645, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.245418569254186e-05, |
|
"loss": 2.0103, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.2423744292237446e-05, |
|
"loss": 2.052, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.239330289193303e-05, |
|
"loss": 2.0134, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.236286149162862e-05, |
|
"loss": 2.025, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.23324200913242e-05, |
|
"loss": 2.0338, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.230197869101979e-05, |
|
"loss": 2.0614, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.2271537290715374e-05, |
|
"loss": 1.9784, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.224109589041096e-05, |
|
"loss": 1.9976, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.2210654490106545e-05, |
|
"loss": 2.0695, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.218021308980214e-05, |
|
"loss": 1.8607, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.214977168949772e-05, |
|
"loss": 2.0003, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.21193302891933e-05, |
|
"loss": 1.9529, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.208888888888889e-05, |
|
"loss": 1.9775, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.205844748858448e-05, |
|
"loss": 2.1202, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.2028006088280066e-05, |
|
"loss": 2.0416, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.1997564687975645e-05, |
|
"loss": 1.939, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.196712328767123e-05, |
|
"loss": 1.8248, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.193668188736682e-05, |
|
"loss": 1.768, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.190624048706241e-05, |
|
"loss": 1.7417, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.1875799086757995e-05, |
|
"loss": 1.787, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.184657534246576e-05, |
|
"loss": 1.8256, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.181613394216134e-05, |
|
"loss": 1.7764, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.1785692541856925e-05, |
|
"loss": 1.7467, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.175525114155251e-05, |
|
"loss": 1.7261, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.17248097412481e-05, |
|
"loss": 1.7825, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.169436834094369e-05, |
|
"loss": 1.7536, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.1663926940639274e-05, |
|
"loss": 1.7507, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.1633485540334853e-05, |
|
"loss": 1.7534, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.1603044140030446e-05, |
|
"loss": 1.6879, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.157260273972603e-05, |
|
"loss": 1.6767, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 3.154216133942162e-05, |
|
"loss": 1.7718, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 3.15117199391172e-05, |
|
"loss": 1.7115, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.148127853881279e-05, |
|
"loss": 1.7024, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3.1450837138508374e-05, |
|
"loss": 1.7389, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 3.142039573820396e-05, |
|
"loss": 1.7123, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 3.1389954337899546e-05, |
|
"loss": 1.835, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.135951293759513e-05, |
|
"loss": 1.7925, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.132907153729072e-05, |
|
"loss": 1.7361, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.12986301369863e-05, |
|
"loss": 1.775, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.126818873668189e-05, |
|
"loss": 1.699, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.123774733637748e-05, |
|
"loss": 1.8377, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.120730593607306e-05, |
|
"loss": 1.6677, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.1176864535768645e-05, |
|
"loss": 1.7617, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.114642313546423e-05, |
|
"loss": 1.7213, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.1115981735159824e-05, |
|
"loss": 1.8376, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.108554033485541e-05, |
|
"loss": 1.792, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.105509893455099e-05, |
|
"loss": 1.8017, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.1024657534246574e-05, |
|
"loss": 1.7817, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.0994216133942166e-05, |
|
"loss": 1.8186, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.096377473363775e-05, |
|
"loss": 1.7081, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.093333333333334e-05, |
|
"loss": 1.7725, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.090289193302892e-05, |
|
"loss": 1.8943, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.087245053272451e-05, |
|
"loss": 1.8723, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.0842009132420095e-05, |
|
"loss": 1.8089, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.081156773211568e-05, |
|
"loss": 1.7635, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.0781126331811266e-05, |
|
"loss": 1.7497, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.075068493150685e-05, |
|
"loss": 1.738, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.072024353120244e-05, |
|
"loss": 1.7198, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.068980213089802e-05, |
|
"loss": 1.7462, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3.065936073059361e-05, |
|
"loss": 1.8347, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3.0628919330289194e-05, |
|
"loss": 1.8402, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3.059847792998478e-05, |
|
"loss": 1.8079, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.0568036529680366e-05, |
|
"loss": 1.7424, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.053759512937595e-05, |
|
"loss": 1.7832, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.050715372907154e-05, |
|
"loss": 1.7963, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 3.0477929984779303e-05, |
|
"loss": 1.8091, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 3.044748858447489e-05, |
|
"loss": 1.6953, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.0417047184170478e-05, |
|
"loss": 1.8256, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.038660578386606e-05, |
|
"loss": 1.8461, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.0356164383561646e-05, |
|
"loss": 1.6912, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.032572298325723e-05, |
|
"loss": 1.7326, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.029528158295282e-05, |
|
"loss": 1.7778, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.0264840182648406e-05, |
|
"loss": 1.7841, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.023439878234399e-05, |
|
"loss": 1.8748, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.0203957382039574e-05, |
|
"loss": 1.7683, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.0173515981735163e-05, |
|
"loss": 1.8536, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.014307458143075e-05, |
|
"loss": 1.865, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.0112633181126334e-05, |
|
"loss": 1.7282, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.008219178082192e-05, |
|
"loss": 1.8071, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.005175038051751e-05, |
|
"loss": 1.7984, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.002130898021309e-05, |
|
"loss": 1.8904, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.9990867579908677e-05, |
|
"loss": 1.8645, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 2.9960426179604263e-05, |
|
"loss": 1.8624, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.9929984779299852e-05, |
|
"loss": 1.7069, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.9899543378995438e-05, |
|
"loss": 1.8513, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.9869101978691023e-05, |
|
"loss": 1.695, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.9838660578386606e-05, |
|
"loss": 1.789, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.9808219178082195e-05, |
|
"loss": 1.8332, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.977777777777778e-05, |
|
"loss": 1.781, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.9747336377473366e-05, |
|
"loss": 1.7331, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.971689497716895e-05, |
|
"loss": 1.7355, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.968645357686454e-05, |
|
"loss": 1.7736, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.9656012176560126e-05, |
|
"loss": 1.8316, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.962557077625571e-05, |
|
"loss": 1.7823, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.9595129375951294e-05, |
|
"loss": 1.8136, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.9564687975646883e-05, |
|
"loss": 1.8344, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.953424657534247e-05, |
|
"loss": 1.8676, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.9503805175038055e-05, |
|
"loss": 1.8041, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.9473363774733637e-05, |
|
"loss": 1.7538, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.944292237442923e-05, |
|
"loss": 1.7551, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.9412480974124812e-05, |
|
"loss": 1.8191, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.9382039573820398e-05, |
|
"loss": 1.6856, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.9351598173515983e-05, |
|
"loss": 1.8142, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.9321156773211572e-05, |
|
"loss": 1.7193, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.9290715372907158e-05, |
|
"loss": 1.8532, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.926027397260274e-05, |
|
"loss": 1.7633, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.9229832572298326e-05, |
|
"loss": 1.8355, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.9199391171993915e-05, |
|
"loss": 1.7773, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.91689497716895e-05, |
|
"loss": 1.8063, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.9138508371385086e-05, |
|
"loss": 1.8132, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.9108066971080672e-05, |
|
"loss": 1.8179, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.907762557077626e-05, |
|
"loss": 1.8025, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.9047184170471843e-05, |
|
"loss": 1.9039, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.901674277016743e-05, |
|
"loss": 1.8192, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.8986301369863015e-05, |
|
"loss": 1.7935, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.8955859969558604e-05, |
|
"loss": 1.8275, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.892541856925419e-05, |
|
"loss": 1.818, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.8894977168949775e-05, |
|
"loss": 1.8142, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.8864535768645357e-05, |
|
"loss": 1.8139, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.8834094368340947e-05, |
|
"loss": 1.8355, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.8803652968036532e-05, |
|
"loss": 1.7334, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.8773211567732118e-05, |
|
"loss": 1.7533, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.8742770167427704e-05, |
|
"loss": 1.7938, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.8712328767123293e-05, |
|
"loss": 1.7383, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.868188736681888e-05, |
|
"loss": 1.7569, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.865144596651446e-05, |
|
"loss": 1.891, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.8621004566210046e-05, |
|
"loss": 1.8103, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.8590563165905635e-05, |
|
"loss": 1.8334, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.856012176560122e-05, |
|
"loss": 1.7458, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.8529680365296807e-05, |
|
"loss": 1.8501, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.849923896499239e-05, |
|
"loss": 1.7494, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.846879756468798e-05, |
|
"loss": 1.7331, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.8438356164383564e-05, |
|
"loss": 1.8252, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 2.840791476407915e-05, |
|
"loss": 1.808, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.8377473363774735e-05, |
|
"loss": 1.7964, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.8347031963470324e-05, |
|
"loss": 1.8049, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.831659056316591e-05, |
|
"loss": 1.8175, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.8286149162861492e-05, |
|
"loss": 1.768, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 2.8255707762557078e-05, |
|
"loss": 1.8176, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 2.8225266362252667e-05, |
|
"loss": 1.7983, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 2.8194824961948253e-05, |
|
"loss": 1.8383, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.8164383561643838e-05, |
|
"loss": 1.77, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.8133942161339424e-05, |
|
"loss": 1.8007, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.8103500761035013e-05, |
|
"loss": 1.7941, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.8073059360730595e-05, |
|
"loss": 1.8826, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.804261796042618e-05, |
|
"loss": 1.8164, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 2.8012176560121767e-05, |
|
"loss": 1.8422, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 2.7981735159817356e-05, |
|
"loss": 1.607, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 2.795129375951294e-05, |
|
"loss": 1.5504, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 2.7920852359208527e-05, |
|
"loss": 1.5181, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 2.789041095890411e-05, |
|
"loss": 1.5856, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 2.78599695585997e-05, |
|
"loss": 1.4975, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 2.7829528158295284e-05, |
|
"loss": 1.5684, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 2.779908675799087e-05, |
|
"loss": 1.6031, |
|
"step": 10025 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 2.7768645357686455e-05, |
|
"loss": 1.5398, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 2.7738203957382045e-05, |
|
"loss": 1.5564, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 2.770776255707763e-05, |
|
"loss": 1.5395, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 2.7677321156773213e-05, |
|
"loss": 1.5022, |
|
"step": 10125 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 2.7646879756468798e-05, |
|
"loss": 1.4697, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 2.7616438356164387e-05, |
|
"loss": 1.5704, |
|
"step": 10175 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 2.7585996955859973e-05, |
|
"loss": 1.5571, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 2.755555555555556e-05, |
|
"loss": 1.5742, |
|
"step": 10225 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 2.752511415525114e-05, |
|
"loss": 1.5312, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 2.7494672754946733e-05, |
|
"loss": 1.4847, |
|
"step": 10275 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 2.7464231354642316e-05, |
|
"loss": 1.5724, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 2.74337899543379e-05, |
|
"loss": 1.5509, |
|
"step": 10325 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 2.7403348554033487e-05, |
|
"loss": 1.5007, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 2.7372907153729076e-05, |
|
"loss": 1.5213, |
|
"step": 10375 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 2.7342465753424662e-05, |
|
"loss": 1.5834, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 2.7312024353120244e-05, |
|
"loss": 1.5101, |
|
"step": 10425 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 2.728158295281583e-05, |
|
"loss": 1.5733, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 2.725114155251142e-05, |
|
"loss": 1.5903, |
|
"step": 10475 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 2.7220700152207005e-05, |
|
"loss": 1.5721, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 2.719025875190259e-05, |
|
"loss": 1.5293, |
|
"step": 10525 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 2.7159817351598176e-05, |
|
"loss": 1.6016, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 2.7129375951293765e-05, |
|
"loss": 1.4873, |
|
"step": 10575 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 2.7098934550989347e-05, |
|
"loss": 1.6021, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 2.7068493150684933e-05, |
|
"loss": 1.6195, |
|
"step": 10625 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 2.703805175038052e-05, |
|
"loss": 1.6156, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 2.7007610350076108e-05, |
|
"loss": 1.5061, |
|
"step": 10675 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 2.6977168949771693e-05, |
|
"loss": 1.5679, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 2.694672754946728e-05, |
|
"loss": 1.6087, |
|
"step": 10725 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 2.691628614916286e-05, |
|
"loss": 1.5767, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 2.688584474885845e-05, |
|
"loss": 1.5589, |
|
"step": 10775 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 2.6855403348554036e-05, |
|
"loss": 1.5384, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 2.6824961948249622e-05, |
|
"loss": 1.5174, |
|
"step": 10825 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 2.6794520547945207e-05, |
|
"loss": 1.585, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 2.6764079147640796e-05, |
|
"loss": 1.5766, |
|
"step": 10875 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 2.6733637747336382e-05, |
|
"loss": 1.5724, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 2.6703196347031964e-05, |
|
"loss": 1.5789, |
|
"step": 10925 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 2.667275494672755e-05, |
|
"loss": 1.566, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 2.664231354642314e-05, |
|
"loss": 1.5702, |
|
"step": 10975 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 2.6611872146118725e-05, |
|
"loss": 1.5913, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 2.658143074581431e-05, |
|
"loss": 1.5544, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 2.6550989345509893e-05, |
|
"loss": 1.4748, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 2.6520547945205485e-05, |
|
"loss": 1.5538, |
|
"step": 11075 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 2.6490106544901068e-05, |
|
"loss": 1.5493, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 2.6459665144596653e-05, |
|
"loss": 1.5631, |
|
"step": 11125 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 2.642922374429224e-05, |
|
"loss": 1.6231, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 2.6398782343987828e-05, |
|
"loss": 1.5592, |
|
"step": 11175 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 2.6368340943683414e-05, |
|
"loss": 1.4599, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 2.6337899543378996e-05, |
|
"loss": 1.5991, |
|
"step": 11225 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 2.630745814307458e-05, |
|
"loss": 1.5998, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 2.627701674277017e-05, |
|
"loss": 1.5227, |
|
"step": 11275 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 2.6246575342465756e-05, |
|
"loss": 1.5894, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 2.6216133942161342e-05, |
|
"loss": 1.5064, |
|
"step": 11325 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 2.6185692541856928e-05, |
|
"loss": 1.5878, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 2.6155251141552517e-05, |
|
"loss": 1.608, |
|
"step": 11375 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 2.61248097412481e-05, |
|
"loss": 1.6658, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 2.6094368340943685e-05, |
|
"loss": 1.5905, |
|
"step": 11425 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 2.606392694063927e-05, |
|
"loss": 1.6711, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 2.603348554033486e-05, |
|
"loss": 1.5699, |
|
"step": 11475 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 2.6003044140030445e-05, |
|
"loss": 1.6112, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 2.597260273972603e-05, |
|
"loss": 1.5901, |
|
"step": 11525 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 2.5942161339421613e-05, |
|
"loss": 1.5447, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 2.5911719939117202e-05, |
|
"loss": 1.5636, |
|
"step": 11575 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.5881278538812788e-05, |
|
"loss": 1.6046, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 2.5850837138508374e-05, |
|
"loss": 1.5229, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 2.582039573820396e-05, |
|
"loss": 1.6017, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 2.578995433789955e-05, |
|
"loss": 1.5262, |
|
"step": 11675 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 2.5759512937595134e-05, |
|
"loss": 1.519, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.5729071537290716e-05, |
|
"loss": 1.5743, |
|
"step": 11725 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.5698630136986302e-05, |
|
"loss": 1.566, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.566818873668189e-05, |
|
"loss": 1.5854, |
|
"step": 11775 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.5637747336377477e-05, |
|
"loss": 1.6292, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.5607305936073062e-05, |
|
"loss": 1.6367, |
|
"step": 11825 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.5576864535768645e-05, |
|
"loss": 1.6502, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.5546423135464237e-05, |
|
"loss": 1.6127, |
|
"step": 11875 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 2.551598173515982e-05, |
|
"loss": 1.5351, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.5485540334855405e-05, |
|
"loss": 1.5574, |
|
"step": 11925 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 2.545509893455099e-05, |
|
"loss": 1.6821, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 2.542465753424658e-05, |
|
"loss": 1.4367, |
|
"step": 11975 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 2.5394216133942166e-05, |
|
"loss": 1.6209, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 2.5363774733637748e-05, |
|
"loss": 1.5987, |
|
"step": 12025 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 2.5333333333333334e-05, |
|
"loss": 1.5591, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 2.5302891933028923e-05, |
|
"loss": 1.6609, |
|
"step": 12075 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 2.527245053272451e-05, |
|
"loss": 1.5506, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 2.5242009132420094e-05, |
|
"loss": 1.6036, |
|
"step": 12125 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.521156773211568e-05, |
|
"loss": 1.6555, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.518112633181127e-05, |
|
"loss": 1.6392, |
|
"step": 12175 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.515068493150685e-05, |
|
"loss": 1.6013, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 2.5120243531202437e-05, |
|
"loss": 1.6, |
|
"step": 12225 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 2.5089802130898022e-05, |
|
"loss": 1.5195, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 2.505936073059361e-05, |
|
"loss": 1.5349, |
|
"step": 12275 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 2.5028919330289197e-05, |
|
"loss": 1.6064, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 2.4998477929984783e-05, |
|
"loss": 1.5887, |
|
"step": 12325 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 2.4969254185692542e-05, |
|
"loss": 1.6195, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 2.493881278538813e-05, |
|
"loss": 1.614, |
|
"step": 12375 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 2.4908371385083717e-05, |
|
"loss": 1.6406, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 2.4877929984779302e-05, |
|
"loss": 1.5978, |
|
"step": 12425 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 2.4847488584474885e-05, |
|
"loss": 1.662, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 2.4817047184170474e-05, |
|
"loss": 1.5427, |
|
"step": 12475 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 2.478660578386606e-05, |
|
"loss": 1.6268, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 2.4756164383561645e-05, |
|
"loss": 1.6355, |
|
"step": 12525 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 2.472572298325723e-05, |
|
"loss": 1.5728, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 2.469528158295282e-05, |
|
"loss": 1.6309, |
|
"step": 12575 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 2.4664840182648405e-05, |
|
"loss": 1.5316, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 2.4634398782343988e-05, |
|
"loss": 1.55, |
|
"step": 12625 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 2.4603957382039573e-05, |
|
"loss": 1.6023, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 2.4573515981735162e-05, |
|
"loss": 1.5844, |
|
"step": 12675 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.4543074581430748e-05, |
|
"loss": 1.6453, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.4512633181126334e-05, |
|
"loss": 1.509, |
|
"step": 12725 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 2.448219178082192e-05, |
|
"loss": 1.5422, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 2.445175038051751e-05, |
|
"loss": 1.6115, |
|
"step": 12775 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 2.442130898021309e-05, |
|
"loss": 1.5477, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 2.4390867579908677e-05, |
|
"loss": 1.6476, |
|
"step": 12825 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 2.4360426179604262e-05, |
|
"loss": 1.6505, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 2.432998477929985e-05, |
|
"loss": 1.5546, |
|
"step": 12875 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 2.4299543378995437e-05, |
|
"loss": 1.5796, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 2.426910197869102e-05, |
|
"loss": 1.5756, |
|
"step": 12925 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 2.4238660578386605e-05, |
|
"loss": 1.5754, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 2.4208219178082194e-05, |
|
"loss": 1.5819, |
|
"step": 12975 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.417777777777778e-05, |
|
"loss": 1.5826, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.4147336377473365e-05, |
|
"loss": 1.6289, |
|
"step": 13025 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 2.411689497716895e-05, |
|
"loss": 1.5559, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 2.408645357686454e-05, |
|
"loss": 1.6191, |
|
"step": 13075 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 2.4056012176560122e-05, |
|
"loss": 1.5491, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.4025570776255708e-05, |
|
"loss": 1.6345, |
|
"step": 13125 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.3995129375951294e-05, |
|
"loss": 1.4829, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 2.3964687975646883e-05, |
|
"loss": 1.4067, |
|
"step": 13175 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 2.393424657534247e-05, |
|
"loss": 1.3637, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 2.3903805175038054e-05, |
|
"loss": 1.2731, |
|
"step": 13225 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 2.3873363774733636e-05, |
|
"loss": 1.3727, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 2.3842922374429226e-05, |
|
"loss": 1.2957, |
|
"step": 13275 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 2.381248097412481e-05, |
|
"loss": 1.3658, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 2.3782039573820397e-05, |
|
"loss": 1.4263, |
|
"step": 13325 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 2.3751598173515983e-05, |
|
"loss": 1.36, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 2.372115677321157e-05, |
|
"loss": 1.2446, |
|
"step": 13375 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 2.3690715372907157e-05, |
|
"loss": 1.3264, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 2.366027397260274e-05, |
|
"loss": 1.3395, |
|
"step": 13425 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 2.3629832572298325e-05, |
|
"loss": 1.3421, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 2.3599391171993914e-05, |
|
"loss": 1.4379, |
|
"step": 13475 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 2.35689497716895e-05, |
|
"loss": 1.3355, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.3538508371385086e-05, |
|
"loss": 1.4226, |
|
"step": 13525 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.3508066971080668e-05, |
|
"loss": 1.4034, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.347762557077626e-05, |
|
"loss": 1.3015, |
|
"step": 13575 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 2.3447184170471843e-05, |
|
"loss": 1.3953, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 2.341674277016743e-05, |
|
"loss": 1.3534, |
|
"step": 13625 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 2.3386301369863014e-05, |
|
"loss": 1.3378, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 2.3355859969558603e-05, |
|
"loss": 1.345, |
|
"step": 13675 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 2.332541856925419e-05, |
|
"loss": 1.4098, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 2.329497716894977e-05, |
|
"loss": 1.3938, |
|
"step": 13725 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 2.3264535768645357e-05, |
|
"loss": 1.4573, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 2.3234094368340946e-05, |
|
"loss": 1.396, |
|
"step": 13775 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 2.320365296803653e-05, |
|
"loss": 1.3915, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 2.3173211567732117e-05, |
|
"loss": 1.4149, |
|
"step": 13825 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 2.3142770167427703e-05, |
|
"loss": 1.4299, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 2.3112328767123292e-05, |
|
"loss": 1.388, |
|
"step": 13875 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 2.3081887366818874e-05, |
|
"loss": 1.4334, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 2.305144596651446e-05, |
|
"loss": 1.358, |
|
"step": 13925 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 2.3021004566210046e-05, |
|
"loss": 1.3849, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 2.2990563165905635e-05, |
|
"loss": 1.2529, |
|
"step": 13975 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.296012176560122e-05, |
|
"loss": 1.406, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.2929680365296806e-05, |
|
"loss": 1.3589, |
|
"step": 14025 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 2.289923896499239e-05, |
|
"loss": 1.3382, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 2.2868797564687977e-05, |
|
"loss": 1.4394, |
|
"step": 14075 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.2838356164383563e-05, |
|
"loss": 1.4327, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 2.280791476407915e-05, |
|
"loss": 1.3955, |
|
"step": 14125 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 2.2777473363774735e-05, |
|
"loss": 1.38, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.2747031963470324e-05, |
|
"loss": 1.3289, |
|
"step": 14175 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.271659056316591e-05, |
|
"loss": 1.3282, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.268614916286149e-05, |
|
"loss": 1.4225, |
|
"step": 14225 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 2.2655707762557077e-05, |
|
"loss": 1.358, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.2625266362252666e-05, |
|
"loss": 1.3493, |
|
"step": 14275 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.2594824961948252e-05, |
|
"loss": 1.4606, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.2564383561643838e-05, |
|
"loss": 1.3846, |
|
"step": 14325 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 2.253394216133942e-05, |
|
"loss": 1.3729, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 2.2503500761035012e-05, |
|
"loss": 1.3396, |
|
"step": 14375 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 2.2473059360730595e-05, |
|
"loss": 1.3955, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 2.244261796042618e-05, |
|
"loss": 1.3225, |
|
"step": 14425 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 2.2412176560121766e-05, |
|
"loss": 1.3583, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 2.2381735159817355e-05, |
|
"loss": 1.3146, |
|
"step": 14475 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 2.235129375951294e-05, |
|
"loss": 1.4239, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 2.2320852359208523e-05, |
|
"loss": 1.3779, |
|
"step": 14525 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 2.229041095890411e-05, |
|
"loss": 1.4361, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 2.2259969558599698e-05, |
|
"loss": 1.3766, |
|
"step": 14575 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 2.2229528158295284e-05, |
|
"loss": 1.3609, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 2.219908675799087e-05, |
|
"loss": 1.4332, |
|
"step": 14625 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 2.2168645357686455e-05, |
|
"loss": 1.4219, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 2.2138203957382044e-05, |
|
"loss": 1.4023, |
|
"step": 14675 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 2.2107762557077626e-05, |
|
"loss": 1.4445, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 2.2077321156773212e-05, |
|
"loss": 1.3996, |
|
"step": 14725 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 2.2048097412480978e-05, |
|
"loss": 1.3752, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 2.2017656012176563e-05, |
|
"loss": 1.3732, |
|
"step": 14775 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 2.198721461187215e-05, |
|
"loss": 1.4234, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 2.195677321156773e-05, |
|
"loss": 1.3971, |
|
"step": 14825 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 2.192633181126332e-05, |
|
"loss": 1.3729, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 2.1895890410958906e-05, |
|
"loss": 1.3405, |
|
"step": 14875 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 2.1865449010654492e-05, |
|
"loss": 1.3572, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 2.1835007610350077e-05, |
|
"loss": 1.3331, |
|
"step": 14925 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 2.1804566210045667e-05, |
|
"loss": 1.368, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 2.1774124809741252e-05, |
|
"loss": 1.3649, |
|
"step": 14975 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 2.1743683409436835e-05, |
|
"loss": 1.389, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 2.171324200913242e-05, |
|
"loss": 1.4163, |
|
"step": 15025 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 2.168280060882801e-05, |
|
"loss": 1.3442, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 2.1652359208523595e-05, |
|
"loss": 1.3446, |
|
"step": 15075 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 2.162191780821918e-05, |
|
"loss": 1.3772, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 2.1591476407914763e-05, |
|
"loss": 1.4261, |
|
"step": 15125 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 2.1561035007610352e-05, |
|
"loss": 1.4058, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 2.1530593607305938e-05, |
|
"loss": 1.3876, |
|
"step": 15175 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 2.1500152207001523e-05, |
|
"loss": 1.3427, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 2.146971080669711e-05, |
|
"loss": 1.3778, |
|
"step": 15225 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 2.1439269406392698e-05, |
|
"loss": 1.3358, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 2.1408828006088284e-05, |
|
"loss": 1.4508, |
|
"step": 15275 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 2.1378386605783866e-05, |
|
"loss": 1.3823, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 2.1347945205479452e-05, |
|
"loss": 1.4207, |
|
"step": 15325 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 2.131750380517504e-05, |
|
"loss": 1.3759, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 2.1287062404870626e-05, |
|
"loss": 1.4415, |
|
"step": 15375 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 2.1256621004566212e-05, |
|
"loss": 1.3673, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 2.1226179604261798e-05, |
|
"loss": 1.4189, |
|
"step": 15425 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 2.1195738203957387e-05, |
|
"loss": 1.3827, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 2.116529680365297e-05, |
|
"loss": 1.3898, |
|
"step": 15475 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 2.1134855403348555e-05, |
|
"loss": 1.444, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 2.110441400304414e-05, |
|
"loss": 1.3941, |
|
"step": 15525 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 2.107397260273973e-05, |
|
"loss": 1.4336, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 2.1043531202435315e-05, |
|
"loss": 1.4159, |
|
"step": 15575 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 2.10130898021309e-05, |
|
"loss": 1.351, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.0982648401826483e-05, |
|
"loss": 1.336, |
|
"step": 15625 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.0952207001522072e-05, |
|
"loss": 1.4279, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 2.0921765601217658e-05, |
|
"loss": 1.3384, |
|
"step": 15675 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 2.0891324200913244e-05, |
|
"loss": 1.3338, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 2.086088280060883e-05, |
|
"loss": 1.3422, |
|
"step": 15725 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 2.083044140030442e-05, |
|
"loss": 1.4765, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 2.08e-05, |
|
"loss": 1.4493, |
|
"step": 15775 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 2.0769558599695586e-05, |
|
"loss": 1.4215, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 2.0739117199391172e-05, |
|
"loss": 1.4274, |
|
"step": 15825 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 2.070867579908676e-05, |
|
"loss": 1.4245, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 2.0678234398782347e-05, |
|
"loss": 1.4564, |
|
"step": 15875 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 2.0647792998477933e-05, |
|
"loss": 1.3672, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 2.0617351598173515e-05, |
|
"loss": 1.4063, |
|
"step": 15925 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 2.0586910197869104e-05, |
|
"loss": 1.4454, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 2.055646879756469e-05, |
|
"loss": 1.4042, |
|
"step": 15975 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 2.0526027397260275e-05, |
|
"loss": 1.3176, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 2.049558599695586e-05, |
|
"loss": 1.3937, |
|
"step": 16025 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 2.046514459665145e-05, |
|
"loss": 1.4184, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 2.0434703196347036e-05, |
|
"loss": 1.3941, |
|
"step": 16075 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 2.0404261796042618e-05, |
|
"loss": 1.3824, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 2.0373820395738204e-05, |
|
"loss": 1.3031, |
|
"step": 16125 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 2.0343378995433793e-05, |
|
"loss": 1.3937, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 2.031293759512938e-05, |
|
"loss": 1.3045, |
|
"step": 16175 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 2.0282496194824964e-05, |
|
"loss": 1.4681, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 2.025205479452055e-05, |
|
"loss": 1.4129, |
|
"step": 16225 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 2.022161339421614e-05, |
|
"loss": 1.4236, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 2.019117199391172e-05, |
|
"loss": 1.4638, |
|
"step": 16275 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 2.0160730593607307e-05, |
|
"loss": 1.4065, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 2.0130289193302892e-05, |
|
"loss": 1.395, |
|
"step": 16325 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 2.009984779299848e-05, |
|
"loss": 1.441, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 2.0069406392694067e-05, |
|
"loss": 1.4944, |
|
"step": 16375 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 2.003896499238965e-05, |
|
"loss": 1.4018, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.0008523592085235e-05, |
|
"loss": 1.4352, |
|
"step": 16425 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 1.9978082191780824e-05, |
|
"loss": 1.1718, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 1.994764079147641e-05, |
|
"loss": 1.2079, |
|
"step": 16475 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 1.9917199391171996e-05, |
|
"loss": 1.1671, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 1.988675799086758e-05, |
|
"loss": 1.1194, |
|
"step": 16525 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 1.9856316590563167e-05, |
|
"loss": 1.2303, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 1.9825875190258753e-05, |
|
"loss": 1.1932, |
|
"step": 16575 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 1.979543378995434e-05, |
|
"loss": 1.2365, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 1.9764992389649927e-05, |
|
"loss": 1.1352, |
|
"step": 16625 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 1.973455098934551e-05, |
|
"loss": 1.205, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 1.97041095890411e-05, |
|
"loss": 1.2005, |
|
"step": 16675 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 1.9673668188736684e-05, |
|
"loss": 1.2812, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 1.964322678843227e-05, |
|
"loss": 1.1773, |
|
"step": 16725 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 1.9612785388127856e-05, |
|
"loss": 1.1904, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 1.958234398782344e-05, |
|
"loss": 1.2193, |
|
"step": 16775 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 1.9551902587519027e-05, |
|
"loss": 1.1983, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 1.9521461187214613e-05, |
|
"loss": 1.1261, |
|
"step": 16825 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 1.94910197869102e-05, |
|
"loss": 1.2459, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 1.9460578386605788e-05, |
|
"loss": 1.2696, |
|
"step": 16875 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 1.943013698630137e-05, |
|
"loss": 1.1993, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 1.939969558599696e-05, |
|
"loss": 1.1595, |
|
"step": 16925 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 1.9369254185692545e-05, |
|
"loss": 1.1438, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 1.933881278538813e-05, |
|
"loss": 1.2023, |
|
"step": 16975 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 1.9308371385083716e-05, |
|
"loss": 1.2327, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 1.92779299847793e-05, |
|
"loss": 1.2655, |
|
"step": 17025 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 1.9247488584474887e-05, |
|
"loss": 1.1495, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 1.9217047184170473e-05, |
|
"loss": 1.2121, |
|
"step": 17075 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 1.918660578386606e-05, |
|
"loss": 1.2187, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 1.9156164383561648e-05, |
|
"loss": 1.2401, |
|
"step": 17125 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 1.912572298325723e-05, |
|
"loss": 1.1772, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 1.909528158295282e-05, |
|
"loss": 1.2103, |
|
"step": 17175 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 1.90648401826484e-05, |
|
"loss": 1.2021, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 1.9035616438356167e-05, |
|
"loss": 1.1839, |
|
"step": 17225 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 1.9005175038051753e-05, |
|
"loss": 1.2107, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 1.897473363774734e-05, |
|
"loss": 1.2437, |
|
"step": 17275 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 1.8944292237442924e-05, |
|
"loss": 1.2521, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 1.8915068493150687e-05, |
|
"loss": 1.1999, |
|
"step": 17325 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 1.8884627092846272e-05, |
|
"loss": 1.1996, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 1.8854185692541858e-05, |
|
"loss": 1.245, |
|
"step": 17375 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 1.8823744292237444e-05, |
|
"loss": 1.2139, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 1.879330289193303e-05, |
|
"loss": 1.196, |
|
"step": 17425 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 1.8762861491628615e-05, |
|
"loss": 1.267, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 1.87324200913242e-05, |
|
"loss": 1.2152, |
|
"step": 17475 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 1.870197869101979e-05, |
|
"loss": 1.2098, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 1.8671537290715372e-05, |
|
"loss": 1.2526, |
|
"step": 17525 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 1.864109589041096e-05, |
|
"loss": 1.1557, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 1.8610654490106547e-05, |
|
"loss": 1.2998, |
|
"step": 17575 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 1.8580213089802133e-05, |
|
"loss": 1.192, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 1.8549771689497718e-05, |
|
"loss": 1.1579, |
|
"step": 17625 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 1.8519330289193304e-05, |
|
"loss": 1.2424, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 1.848888888888889e-05, |
|
"loss": 1.2178, |
|
"step": 17675 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 1.8458447488584475e-05, |
|
"loss": 1.2272, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 1.842800608828006e-05, |
|
"loss": 1.2794, |
|
"step": 17725 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 1.839756468797565e-05, |
|
"loss": 1.1844, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 1.8367123287671232e-05, |
|
"loss": 1.2341, |
|
"step": 17775 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 1.833668188736682e-05, |
|
"loss": 1.1533, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 1.8306240487062407e-05, |
|
"loss": 1.2264, |
|
"step": 17825 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 1.8275799086757993e-05, |
|
"loss": 1.222, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 1.824535768645358e-05, |
|
"loss": 1.1864, |
|
"step": 17875 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 1.8214916286149164e-05, |
|
"loss": 1.1615, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 1.818447488584475e-05, |
|
"loss": 1.2101, |
|
"step": 17925 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 1.8154033485540335e-05, |
|
"loss": 1.2291, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 1.812359208523592e-05, |
|
"loss": 1.1934, |
|
"step": 17975 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 1.809315068493151e-05, |
|
"loss": 1.1713, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 1.8062709284627092e-05, |
|
"loss": 1.1374, |
|
"step": 18025 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 1.803226788432268e-05, |
|
"loss": 1.1841, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 1.8001826484018264e-05, |
|
"loss": 1.2757, |
|
"step": 18075 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 1.7971385083713853e-05, |
|
"loss": 1.225, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 1.794094368340944e-05, |
|
"loss": 1.1889, |
|
"step": 18125 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 1.7910502283105024e-05, |
|
"loss": 1.2067, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 1.788006088280061e-05, |
|
"loss": 1.2683, |
|
"step": 18175 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 1.7849619482496196e-05, |
|
"loss": 1.194, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 1.781917808219178e-05, |
|
"loss": 1.2623, |
|
"step": 18225 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 1.7788736681887367e-05, |
|
"loss": 1.2462, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 1.7758295281582953e-05, |
|
"loss": 1.2488, |
|
"step": 18275 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 1.7727853881278542e-05, |
|
"loss": 1.1575, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 1.7697412480974124e-05, |
|
"loss": 1.3261, |
|
"step": 18325 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 1.7666971080669713e-05, |
|
"loss": 1.1545, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 1.76365296803653e-05, |
|
"loss": 1.2144, |
|
"step": 18375 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 1.7606088280060884e-05, |
|
"loss": 1.2061, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 1.757564687975647e-05, |
|
"loss": 1.2538, |
|
"step": 18425 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 1.7545205479452056e-05, |
|
"loss": 1.238, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 1.751476407914764e-05, |
|
"loss": 1.1711, |
|
"step": 18475 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 1.7484322678843227e-05, |
|
"loss": 1.2815, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 1.7453881278538813e-05, |
|
"loss": 1.2561, |
|
"step": 18525 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 1.7423439878234402e-05, |
|
"loss": 1.2703, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 1.7392998477929984e-05, |
|
"loss": 1.2548, |
|
"step": 18575 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 1.7362557077625573e-05, |
|
"loss": 1.213, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 1.733211567732116e-05, |
|
"loss": 1.2203, |
|
"step": 18625 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 1.7301674277016745e-05, |
|
"loss": 1.2282, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 1.727123287671233e-05, |
|
"loss": 1.1951, |
|
"step": 18675 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 1.7240791476407916e-05, |
|
"loss": 1.2652, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 1.72103500761035e-05, |
|
"loss": 1.1692, |
|
"step": 18725 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 1.7179908675799087e-05, |
|
"loss": 1.2708, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 1.7149467275494673e-05, |
|
"loss": 1.1725, |
|
"step": 18775 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 1.7119025875190262e-05, |
|
"loss": 1.1644, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 1.7088584474885844e-05, |
|
"loss": 1.2263, |
|
"step": 18825 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 1.7058143074581433e-05, |
|
"loss": 1.2848, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 1.7027701674277016e-05, |
|
"loss": 1.2118, |
|
"step": 18875 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 1.6997260273972605e-05, |
|
"loss": 1.2332, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 1.696681887366819e-05, |
|
"loss": 1.1837, |
|
"step": 18925 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 1.6936377473363776e-05, |
|
"loss": 1.2719, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 1.6905936073059362e-05, |
|
"loss": 1.1825, |
|
"step": 18975 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 1.6875494672754948e-05, |
|
"loss": 1.2161, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 1.6845053272450533e-05, |
|
"loss": 1.2578, |
|
"step": 19025 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 1.681461187214612e-05, |
|
"loss": 1.2, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 1.6784170471841705e-05, |
|
"loss": 1.2705, |
|
"step": 19075 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 1.6753729071537294e-05, |
|
"loss": 1.2378, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 1.6723287671232876e-05, |
|
"loss": 1.2475, |
|
"step": 19125 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 1.6692846270928465e-05, |
|
"loss": 1.2566, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 1.666240487062405e-05, |
|
"loss": 1.3268, |
|
"step": 19175 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 1.6631963470319636e-05, |
|
"loss": 1.244, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 1.6601522070015222e-05, |
|
"loss": 1.2251, |
|
"step": 19225 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 1.6571080669710808e-05, |
|
"loss": 1.1907, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 1.6540639269406393e-05, |
|
"loss": 1.1661, |
|
"step": 19275 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 1.651019786910198e-05, |
|
"loss": 1.1985, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 1.6479756468797565e-05, |
|
"loss": 1.1985, |
|
"step": 19325 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 1.6449315068493154e-05, |
|
"loss": 1.2457, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 1.6418873668188736e-05, |
|
"loss": 1.1869, |
|
"step": 19375 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 1.6388432267884325e-05, |
|
"loss": 1.1581, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 1.635799086757991e-05, |
|
"loss": 1.2958, |
|
"step": 19425 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 1.6327549467275497e-05, |
|
"loss": 1.1531, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 1.6297108066971082e-05, |
|
"loss": 1.2319, |
|
"step": 19475 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 1.6266666666666668e-05, |
|
"loss": 1.2317, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 1.6236225266362254e-05, |
|
"loss": 1.1822, |
|
"step": 19525 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 1.620578386605784e-05, |
|
"loss": 1.2323, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 1.6175342465753425e-05, |
|
"loss": 1.2421, |
|
"step": 19575 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 1.6144901065449014e-05, |
|
"loss": 1.1909, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 1.6114459665144596e-05, |
|
"loss": 1.1956, |
|
"step": 19625 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 1.6084018264840185e-05, |
|
"loss": 1.2423, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 1.6053576864535768e-05, |
|
"loss": 1.1934, |
|
"step": 19675 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.6023135464231357e-05, |
|
"loss": 1.2855, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.5992694063926942e-05, |
|
"loss": 1.1549, |
|
"step": 19725 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 1.5962252663622528e-05, |
|
"loss": 1.0158, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 1.5931811263318114e-05, |
|
"loss": 1.1329, |
|
"step": 19775 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 1.59013698630137e-05, |
|
"loss": 1.0144, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 1.5870928462709285e-05, |
|
"loss": 1.0526, |
|
"step": 19825 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 1.584048706240487e-05, |
|
"loss": 1.0858, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 1.5810045662100456e-05, |
|
"loss": 1.0668, |
|
"step": 19875 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 1.5779604261796046e-05, |
|
"loss": 1.0202, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.5749162861491628e-05, |
|
"loss": 1.0321, |
|
"step": 19925 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.5718721461187217e-05, |
|
"loss": 1.0682, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1.5688280060882803e-05, |
|
"loss": 1.0644, |
|
"step": 19975 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 1.5657838660578388e-05, |
|
"loss": 1.1266, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 1.5627397260273974e-05, |
|
"loss": 1.0861, |
|
"step": 20025 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 1.559695585996956e-05, |
|
"loss": 1.0446, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 1.5566514459665145e-05, |
|
"loss": 1.0408, |
|
"step": 20075 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 1.553607305936073e-05, |
|
"loss": 1.0181, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 1.5505631659056317e-05, |
|
"loss": 1.0721, |
|
"step": 20125 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 1.5475190258751906e-05, |
|
"loss": 1.08, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 1.5444748858447488e-05, |
|
"loss": 1.1263, |
|
"step": 20175 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 1.5414307458143077e-05, |
|
"loss": 1.0331, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 1.5383866057838663e-05, |
|
"loss": 1.0047, |
|
"step": 20225 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 1.535342465753425e-05, |
|
"loss": 1.0146, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 1.5322983257229834e-05, |
|
"loss": 1.0828, |
|
"step": 20275 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 1.529254185692542e-05, |
|
"loss": 1.0656, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 1.5262100456621006e-05, |
|
"loss": 1.0661, |
|
"step": 20325 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 1.5231659056316593e-05, |
|
"loss": 1.0858, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 1.5201217656012177e-05, |
|
"loss": 1.0975, |
|
"step": 20375 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 1.5170776255707764e-05, |
|
"loss": 1.0874, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 1.514033485540335e-05, |
|
"loss": 1.061, |
|
"step": 20425 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 1.5109893455098936e-05, |
|
"loss": 1.0563, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 1.5079452054794521e-05, |
|
"loss": 1.0779, |
|
"step": 20475 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 1.5049010654490109e-05, |
|
"loss": 1.0346, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 1.5018569254185693e-05, |
|
"loss": 1.0741, |
|
"step": 20525 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 1.498812785388128e-05, |
|
"loss": 1.0446, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 1.4957686453576866e-05, |
|
"loss": 1.0447, |
|
"step": 20575 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 1.4927245053272451e-05, |
|
"loss": 1.0887, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 1.4896803652968037e-05, |
|
"loss": 1.0644, |
|
"step": 20625 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 1.4866362252663624e-05, |
|
"loss": 1.0262, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 1.4835920852359208e-05, |
|
"loss": 1.0814, |
|
"step": 20675 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 1.4805479452054796e-05, |
|
"loss": 1.0796, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 1.4775038051750381e-05, |
|
"loss": 1.0907, |
|
"step": 20725 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 1.4744596651445969e-05, |
|
"loss": 1.0584, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 1.4714155251141553e-05, |
|
"loss": 1.0504, |
|
"step": 20775 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 1.468371385083714e-05, |
|
"loss": 1.0383, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 1.4653272450532726e-05, |
|
"loss": 1.0387, |
|
"step": 20825 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 1.4622831050228312e-05, |
|
"loss": 1.0644, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 1.4592389649923897e-05, |
|
"loss": 1.008, |
|
"step": 20875 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 1.4561948249619485e-05, |
|
"loss": 1.086, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 1.4531506849315069e-05, |
|
"loss": 1.0496, |
|
"step": 20925 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 1.4501065449010656e-05, |
|
"loss": 0.9918, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 1.4470624048706242e-05, |
|
"loss": 1.0559, |
|
"step": 20975 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 1.4440182648401827e-05, |
|
"loss": 1.0467, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.4409741248097413e-05, |
|
"loss": 1.0557, |
|
"step": 21025 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 1.4379299847793e-05, |
|
"loss": 1.0846, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 1.4348858447488584e-05, |
|
"loss": 1.0836, |
|
"step": 21075 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 1.4318417047184172e-05, |
|
"loss": 1.082, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 1.4287975646879757e-05, |
|
"loss": 1.0596, |
|
"step": 21125 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 1.4257534246575345e-05, |
|
"loss": 1.0321, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 1.4227092846270929e-05, |
|
"loss": 1.0805, |
|
"step": 21175 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 1.4196651445966516e-05, |
|
"loss": 1.1013, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 1.4166210045662102e-05, |
|
"loss": 1.0287, |
|
"step": 21225 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 1.4135768645357688e-05, |
|
"loss": 1.0995, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 1.4105327245053273e-05, |
|
"loss": 1.0834, |
|
"step": 21275 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 1.407488584474886e-05, |
|
"loss": 1.0702, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 1.4045662100456623e-05, |
|
"loss": 1.0709, |
|
"step": 21325 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 1.4015220700152209e-05, |
|
"loss": 1.126, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 1.3984779299847794e-05, |
|
"loss": 1.0661, |
|
"step": 21375 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 1.395433789954338e-05, |
|
"loss": 1.0154, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 1.3923896499238967e-05, |
|
"loss": 1.0203, |
|
"step": 21425 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 1.3893455098934551e-05, |
|
"loss": 1.0997, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 1.3863013698630139e-05, |
|
"loss": 1.1286, |
|
"step": 21475 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 1.3832572298325724e-05, |
|
"loss": 1.0367, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 1.380213089802131e-05, |
|
"loss": 1.0712, |
|
"step": 21525 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 1.3771689497716896e-05, |
|
"loss": 1.0636, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 1.3741248097412483e-05, |
|
"loss": 1.056, |
|
"step": 21575 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 1.3710806697108067e-05, |
|
"loss": 1.107, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 1.3680365296803655e-05, |
|
"loss": 1.1084, |
|
"step": 21625 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 1.364992389649924e-05, |
|
"loss": 1.0469, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 1.3619482496194828e-05, |
|
"loss": 1.0499, |
|
"step": 21675 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 1.3589041095890412e-05, |
|
"loss": 1.0193, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 1.3558599695585999e-05, |
|
"loss": 1.0213, |
|
"step": 21725 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 1.3528158295281583e-05, |
|
"loss": 1.0763, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 1.349771689497717e-05, |
|
"loss": 1.0602, |
|
"step": 21775 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 1.3467275494672756e-05, |
|
"loss": 1.0075, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 1.3436834094368343e-05, |
|
"loss": 1.0394, |
|
"step": 21825 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 1.3406392694063927e-05, |
|
"loss": 1.1018, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 1.3375951293759515e-05, |
|
"loss": 1.1407, |
|
"step": 21875 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 1.33455098934551e-05, |
|
"loss": 1.0651, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 1.3316286149162863e-05, |
|
"loss": 1.1539, |
|
"step": 21925 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 1.328584474885845e-05, |
|
"loss": 1.1164, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 1.3255403348554034e-05, |
|
"loss": 1.0859, |
|
"step": 21975 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 1.3224961948249621e-05, |
|
"loss": 1.0339, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 1.3194520547945207e-05, |
|
"loss": 1.0881, |
|
"step": 22025 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 1.3164079147640793e-05, |
|
"loss": 1.0617, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 1.3133637747336379e-05, |
|
"loss": 1.0946, |
|
"step": 22075 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 1.3103196347031966e-05, |
|
"loss": 1.0516, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 1.307275494672755e-05, |
|
"loss": 1.0097, |
|
"step": 22125 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 1.3042313546423137e-05, |
|
"loss": 0.9982, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 1.3011872146118723e-05, |
|
"loss": 1.0149, |
|
"step": 22175 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 1.298143074581431e-05, |
|
"loss": 1.0674, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 1.2950989345509894e-05, |
|
"loss": 1.0485, |
|
"step": 22225 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 1.2920547945205482e-05, |
|
"loss": 1.0641, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 1.2890106544901066e-05, |
|
"loss": 1.0306, |
|
"step": 22275 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 1.2859665144596653e-05, |
|
"loss": 1.0526, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 1.2829223744292239e-05, |
|
"loss": 1.1053, |
|
"step": 22325 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 1.2798782343987823e-05, |
|
"loss": 1.069, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 1.276834094368341e-05, |
|
"loss": 1.0654, |
|
"step": 22375 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 1.2737899543378996e-05, |
|
"loss": 1.0341, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 1.2707458143074583e-05, |
|
"loss": 1.0856, |
|
"step": 22425 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 1.2677016742770167e-05, |
|
"loss": 1.0456, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 1.2646575342465755e-05, |
|
"loss": 1.0782, |
|
"step": 22475 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 1.261613394216134e-05, |
|
"loss": 1.0911, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 1.2585692541856926e-05, |
|
"loss": 1.0151, |
|
"step": 22525 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 1.2555251141552512e-05, |
|
"loss": 1.1133, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 1.2524809741248099e-05, |
|
"loss": 1.098, |
|
"step": 22575 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 1.2494368340943683e-05, |
|
"loss": 1.0943, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 1.246392694063927e-05, |
|
"loss": 1.0622, |
|
"step": 22625 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 1.2433485540334856e-05, |
|
"loss": 1.0801, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 1.2403044140030442e-05, |
|
"loss": 1.0496, |
|
"step": 22675 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 1.2372602739726027e-05, |
|
"loss": 1.1017, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 1.2342161339421615e-05, |
|
"loss": 1.045, |
|
"step": 22725 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 1.2311719939117199e-05, |
|
"loss": 1.1071, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 1.2281278538812786e-05, |
|
"loss": 1.0852, |
|
"step": 22775 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 1.2250837138508372e-05, |
|
"loss": 1.1576, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 1.2220395738203959e-05, |
|
"loss": 1.079, |
|
"step": 22825 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 1.2189954337899543e-05, |
|
"loss": 1.1521, |
|
"step": 22850 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 1.215951293759513e-05, |
|
"loss": 1.0449, |
|
"step": 22875 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 1.2129071537290714e-05, |
|
"loss": 1.1417, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 1.2098630136986302e-05, |
|
"loss": 1.0472, |
|
"step": 22925 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 1.2068188736681888e-05, |
|
"loss": 1.0093, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 1.2037747336377475e-05, |
|
"loss": 1.0782, |
|
"step": 22975 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.2007305936073059e-05, |
|
"loss": 1.0471, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 1.1976864535768646e-05, |
|
"loss": 0.9486, |
|
"step": 23025 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.1946423135464232e-05, |
|
"loss": 0.974, |
|
"step": 23050 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.1915981735159818e-05, |
|
"loss": 0.9259, |
|
"step": 23075 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1.1885540334855403e-05, |
|
"loss": 0.928, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 1.185509893455099e-05, |
|
"loss": 0.9293, |
|
"step": 23125 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 1.1824657534246575e-05, |
|
"loss": 0.9387, |
|
"step": 23150 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 1.1794216133942162e-05, |
|
"loss": 0.8687, |
|
"step": 23175 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.1763774733637748e-05, |
|
"loss": 0.9467, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.1733333333333335e-05, |
|
"loss": 0.8817, |
|
"step": 23225 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.1702891933028919e-05, |
|
"loss": 0.9304, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 1.1672450532724506e-05, |
|
"loss": 0.9319, |
|
"step": 23275 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 1.164200913242009e-05, |
|
"loss": 0.9488, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 1.1611567732115678e-05, |
|
"loss": 0.9054, |
|
"step": 23325 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 1.1581126331811263e-05, |
|
"loss": 0.9246, |
|
"step": 23350 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 1.155068493150685e-05, |
|
"loss": 0.898, |
|
"step": 23375 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 1.1520243531202435e-05, |
|
"loss": 0.947, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 1.1489802130898022e-05, |
|
"loss": 0.9288, |
|
"step": 23425 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 1.1459360730593608e-05, |
|
"loss": 0.9544, |
|
"step": 23450 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 1.1428919330289194e-05, |
|
"loss": 0.9711, |
|
"step": 23475 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 1.139847792998478e-05, |
|
"loss": 0.9133, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 1.1368036529680367e-05, |
|
"loss": 0.9182, |
|
"step": 23525 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 1.133759512937595e-05, |
|
"loss": 0.8882, |
|
"step": 23550 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 1.1307153729071538e-05, |
|
"loss": 0.9431, |
|
"step": 23575 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 1.1276712328767124e-05, |
|
"loss": 0.9347, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 1.1246270928462711e-05, |
|
"loss": 0.9397, |
|
"step": 23625 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 1.1215829528158295e-05, |
|
"loss": 0.8936, |
|
"step": 23650 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 1.1185388127853882e-05, |
|
"loss": 0.9422, |
|
"step": 23675 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 1.1154946727549466e-05, |
|
"loss": 0.994, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 1.1124505327245054e-05, |
|
"loss": 0.9458, |
|
"step": 23725 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 1.109406392694064e-05, |
|
"loss": 0.9833, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 1.1063622526636227e-05, |
|
"loss": 0.9156, |
|
"step": 23775 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 1.103318112633181e-05, |
|
"loss": 0.952, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 1.1002739726027398e-05, |
|
"loss": 0.9329, |
|
"step": 23825 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 1.0972298325722984e-05, |
|
"loss": 0.969, |
|
"step": 23850 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 1.094185692541857e-05, |
|
"loss": 0.9036, |
|
"step": 23875 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 1.0911415525114155e-05, |
|
"loss": 0.893, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 1.0880974124809743e-05, |
|
"loss": 1.0085, |
|
"step": 23925 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 1.0850532724505327e-05, |
|
"loss": 0.92, |
|
"step": 23950 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 1.0820091324200914e-05, |
|
"loss": 0.928, |
|
"step": 23975 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 1.07896499238965e-05, |
|
"loss": 0.9017, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 1.0759208523592087e-05, |
|
"loss": 0.9032, |
|
"step": 24025 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 1.0728767123287671e-05, |
|
"loss": 0.9073, |
|
"step": 24050 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 1.0698325722983258e-05, |
|
"loss": 0.9447, |
|
"step": 24075 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 1.0667884322678842e-05, |
|
"loss": 0.9341, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 1.0638660578386606e-05, |
|
"loss": 0.9369, |
|
"step": 24125 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 1.0608219178082194e-05, |
|
"loss": 0.9721, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 1.0577777777777778e-05, |
|
"loss": 0.9367, |
|
"step": 24175 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 1.0547336377473365e-05, |
|
"loss": 0.9746, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 1.0516894977168949e-05, |
|
"loss": 0.914, |
|
"step": 24225 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 1.0486453576864537e-05, |
|
"loss": 0.8861, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 1.0456012176560122e-05, |
|
"loss": 0.9416, |
|
"step": 24275 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 1.042557077625571e-05, |
|
"loss": 0.9378, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 1.0395129375951294e-05, |
|
"loss": 0.9149, |
|
"step": 24325 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 1.0364687975646881e-05, |
|
"loss": 0.9661, |
|
"step": 24350 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 1.0334246575342467e-05, |
|
"loss": 0.9946, |
|
"step": 24375 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 1.0303805175038052e-05, |
|
"loss": 0.9153, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 1.0273363774733638e-05, |
|
"loss": 1.0122, |
|
"step": 24425 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 1.0242922374429225e-05, |
|
"loss": 0.9563, |
|
"step": 24450 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 1.021248097412481e-05, |
|
"loss": 0.9451, |
|
"step": 24475 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 1.0182039573820397e-05, |
|
"loss": 0.9543, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 1.0151598173515982e-05, |
|
"loss": 0.9336, |
|
"step": 24525 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 1.012115677321157e-05, |
|
"loss": 0.9474, |
|
"step": 24550 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 1.0090715372907154e-05, |
|
"loss": 0.9503, |
|
"step": 24575 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 1.0060273972602741e-05, |
|
"loss": 0.9395, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 1.0029832572298325e-05, |
|
"loss": 0.9076, |
|
"step": 24625 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 9.999391171993912e-06, |
|
"loss": 0.9026, |
|
"step": 24650 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 9.968949771689498e-06, |
|
"loss": 0.9039, |
|
"step": 24675 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 9.938508371385086e-06, |
|
"loss": 0.9744, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 9.908066971080671e-06, |
|
"loss": 0.9818, |
|
"step": 24725 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 9.877625570776257e-06, |
|
"loss": 0.8859, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 9.847184170471843e-06, |
|
"loss": 0.9602, |
|
"step": 24775 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 9.816742770167428e-06, |
|
"loss": 0.9405, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 9.786301369863016e-06, |
|
"loss": 0.9669, |
|
"step": 24825 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 9.755859969558601e-06, |
|
"loss": 0.9106, |
|
"step": 24850 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 9.725418569254187e-06, |
|
"loss": 0.9118, |
|
"step": 24875 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 9.694977168949773e-06, |
|
"loss": 0.981, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 9.664535768645358e-06, |
|
"loss": 0.974, |
|
"step": 24925 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 9.634094368340946e-06, |
|
"loss": 0.8843, |
|
"step": 24950 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 9.603652968036531e-06, |
|
"loss": 0.9714, |
|
"step": 24975 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 9.573211567732117e-06, |
|
"loss": 0.9101, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 9.542770167427703e-06, |
|
"loss": 0.9264, |
|
"step": 25025 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 9.512328767123288e-06, |
|
"loss": 0.9459, |
|
"step": 25050 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 9.481887366818874e-06, |
|
"loss": 0.9391, |
|
"step": 25075 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 9.451445966514461e-06, |
|
"loss": 0.9473, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 9.421004566210047e-06, |
|
"loss": 0.897, |
|
"step": 25125 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 9.390563165905633e-06, |
|
"loss": 0.9351, |
|
"step": 25150 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 9.360121765601219e-06, |
|
"loss": 0.9249, |
|
"step": 25175 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 9.329680365296804e-06, |
|
"loss": 0.9407, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 9.299238964992392e-06, |
|
"loss": 0.9331, |
|
"step": 25225 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 9.268797564687977e-06, |
|
"loss": 0.9749, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 9.238356164383563e-06, |
|
"loss": 0.981, |
|
"step": 25275 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 9.207914764079149e-06, |
|
"loss": 0.9334, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 9.177473363774734e-06, |
|
"loss": 0.928, |
|
"step": 25325 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 9.147031963470322e-06, |
|
"loss": 0.8981, |
|
"step": 25350 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 9.116590563165907e-06, |
|
"loss": 0.9735, |
|
"step": 25375 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 9.086149162861493e-06, |
|
"loss": 0.9042, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 9.055707762557079e-06, |
|
"loss": 0.9443, |
|
"step": 25425 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 9.025266362252664e-06, |
|
"loss": 0.9571, |
|
"step": 25450 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 8.99482496194825e-06, |
|
"loss": 0.9154, |
|
"step": 25475 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 8.964383561643837e-06, |
|
"loss": 0.9131, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 8.933942161339423e-06, |
|
"loss": 0.9096, |
|
"step": 25525 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 8.903500761035009e-06, |
|
"loss": 1.0131, |
|
"step": 25550 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 8.873059360730594e-06, |
|
"loss": 0.9257, |
|
"step": 25575 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 8.84261796042618e-06, |
|
"loss": 0.9627, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 8.812176560121768e-06, |
|
"loss": 0.9599, |
|
"step": 25625 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 8.781735159817353e-06, |
|
"loss": 1.0309, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 8.751293759512939e-06, |
|
"loss": 0.9223, |
|
"step": 25675 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 8.720852359208525e-06, |
|
"loss": 0.9609, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 8.69041095890411e-06, |
|
"loss": 0.9848, |
|
"step": 25725 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 8.659969558599698e-06, |
|
"loss": 0.9744, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 8.629528158295283e-06, |
|
"loss": 0.9079, |
|
"step": 25775 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 8.599086757990869e-06, |
|
"loss": 0.9524, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 8.568645357686455e-06, |
|
"loss": 0.8825, |
|
"step": 25825 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 8.53820395738204e-06, |
|
"loss": 0.9648, |
|
"step": 25850 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 8.507762557077626e-06, |
|
"loss": 0.9268, |
|
"step": 25875 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 8.477321156773213e-06, |
|
"loss": 1.0055, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 8.446879756468799e-06, |
|
"loss": 0.9859, |
|
"step": 25925 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 8.416438356164385e-06, |
|
"loss": 0.876, |
|
"step": 25950 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 8.38599695585997e-06, |
|
"loss": 0.9929, |
|
"step": 25975 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 8.355555555555556e-06, |
|
"loss": 0.937, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 8.325114155251143e-06, |
|
"loss": 0.9504, |
|
"step": 26025 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 8.29467275494673e-06, |
|
"loss": 0.9475, |
|
"step": 26050 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 8.264231354642315e-06, |
|
"loss": 0.8728, |
|
"step": 26075 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 8.2337899543379e-06, |
|
"loss": 0.9089, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 8.203348554033486e-06, |
|
"loss": 0.963, |
|
"step": 26125 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 8.172907153729072e-06, |
|
"loss": 0.9398, |
|
"step": 26150 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 8.14246575342466e-06, |
|
"loss": 0.9664, |
|
"step": 26175 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 8.112024353120245e-06, |
|
"loss": 0.9756, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 8.08158295281583e-06, |
|
"loss": 0.9686, |
|
"step": 26225 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 8.051141552511416e-06, |
|
"loss": 0.9906, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 8.020700152207002e-06, |
|
"loss": 0.9644, |
|
"step": 26275 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 7.99025875190259e-06, |
|
"loss": 0.8352, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 7.959817351598175e-06, |
|
"loss": 0.7675, |
|
"step": 26325 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 7.92937595129376e-06, |
|
"loss": 0.8487, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 7.898934550989346e-06, |
|
"loss": 0.8241, |
|
"step": 26375 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 7.868493150684932e-06, |
|
"loss": 0.8417, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 7.83805175038052e-06, |
|
"loss": 0.8277, |
|
"step": 26425 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 7.807610350076105e-06, |
|
"loss": 0.8032, |
|
"step": 26450 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 7.77716894977169e-06, |
|
"loss": 0.8111, |
|
"step": 26475 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 7.746727549467276e-06, |
|
"loss": 0.8319, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 7.716286149162862e-06, |
|
"loss": 0.8437, |
|
"step": 26525 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 7.685844748858448e-06, |
|
"loss": 0.8167, |
|
"step": 26550 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 7.655403348554035e-06, |
|
"loss": 0.798, |
|
"step": 26575 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 7.624961948249621e-06, |
|
"loss": 0.8465, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 7.594520547945206e-06, |
|
"loss": 0.8215, |
|
"step": 26625 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 7.564079147640791e-06, |
|
"loss": 0.8385, |
|
"step": 26650 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 7.533637747336378e-06, |
|
"loss": 0.8552, |
|
"step": 26675 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 7.503196347031964e-06, |
|
"loss": 0.8127, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 7.472754946727549e-06, |
|
"loss": 0.8545, |
|
"step": 26725 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 7.442313546423136e-06, |
|
"loss": 0.8589, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 7.4118721461187215e-06, |
|
"loss": 0.8231, |
|
"step": 26775 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 7.381430745814307e-06, |
|
"loss": 0.8292, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 7.350989345509894e-06, |
|
"loss": 0.866, |
|
"step": 26825 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 7.320547945205479e-06, |
|
"loss": 0.8413, |
|
"step": 26850 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 7.290106544901066e-06, |
|
"loss": 0.8609, |
|
"step": 26875 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 7.259665144596652e-06, |
|
"loss": 0.8566, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 7.229223744292237e-06, |
|
"loss": 0.9092, |
|
"step": 26925 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 7.198782343987824e-06, |
|
"loss": 0.8238, |
|
"step": 26950 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 7.1683409436834095e-06, |
|
"loss": 0.8372, |
|
"step": 26975 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 7.137899543378995e-06, |
|
"loss": 0.8547, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 7.107458143074582e-06, |
|
"loss": 0.8121, |
|
"step": 27025 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 7.077016742770167e-06, |
|
"loss": 0.8415, |
|
"step": 27050 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 7.046575342465753e-06, |
|
"loss": 0.8953, |
|
"step": 27075 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 7.01613394216134e-06, |
|
"loss": 0.8622, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 6.985692541856925e-06, |
|
"loss": 0.874, |
|
"step": 27125 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 6.955251141552512e-06, |
|
"loss": 0.8017, |
|
"step": 27150 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 6.9248097412480975e-06, |
|
"loss": 0.82, |
|
"step": 27175 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 6.894368340943683e-06, |
|
"loss": 0.827, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 6.86392694063927e-06, |
|
"loss": 0.8299, |
|
"step": 27225 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 6.833485540334855e-06, |
|
"loss": 0.8529, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 6.803044140030441e-06, |
|
"loss": 0.8119, |
|
"step": 27275 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 6.7726027397260276e-06, |
|
"loss": 0.8448, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 6.742161339421613e-06, |
|
"loss": 0.824, |
|
"step": 27325 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 6.7117199391172e-06, |
|
"loss": 0.8442, |
|
"step": 27350 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 6.6812785388127855e-06, |
|
"loss": 0.8194, |
|
"step": 27375 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 6.650837138508371e-06, |
|
"loss": 0.8177, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 6.620395738203958e-06, |
|
"loss": 0.873, |
|
"step": 27425 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 6.589954337899543e-06, |
|
"loss": 0.7966, |
|
"step": 27450 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 6.559512937595129e-06, |
|
"loss": 0.8656, |
|
"step": 27475 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 6.5290715372907155e-06, |
|
"loss": 0.8272, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 6.498630136986301e-06, |
|
"loss": 0.7805, |
|
"step": 27525 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 6.468188736681888e-06, |
|
"loss": 0.8527, |
|
"step": 27550 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 6.4377473363774734e-06, |
|
"loss": 0.8507, |
|
"step": 27575 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 6.407305936073059e-06, |
|
"loss": 0.7855, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 6.376864535768646e-06, |
|
"loss": 0.8705, |
|
"step": 27625 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 6.346423135464231e-06, |
|
"loss": 0.8078, |
|
"step": 27650 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 6.315981735159817e-06, |
|
"loss": 0.7914, |
|
"step": 27675 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 6.2855403348554035e-06, |
|
"loss": 0.8536, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 6.255098934550989e-06, |
|
"loss": 0.8765, |
|
"step": 27725 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 6.224657534246576e-06, |
|
"loss": 0.8406, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 6.194216133942161e-06, |
|
"loss": 0.8427, |
|
"step": 27775 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 6.163774733637747e-06, |
|
"loss": 0.8689, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 6.133333333333334e-06, |
|
"loss": 0.8204, |
|
"step": 27825 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 6.102891933028919e-06, |
|
"loss": 0.8447, |
|
"step": 27850 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 6.072450532724505e-06, |
|
"loss": 0.8356, |
|
"step": 27875 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 6.0420091324200915e-06, |
|
"loss": 0.8776, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 6.011567732115677e-06, |
|
"loss": 0.8543, |
|
"step": 27925 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 5.981126331811264e-06, |
|
"loss": 0.8341, |
|
"step": 27950 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 5.950684931506849e-06, |
|
"loss": 0.832, |
|
"step": 27975 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 5.920243531202435e-06, |
|
"loss": 0.7922, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 5.889802130898022e-06, |
|
"loss": 0.8491, |
|
"step": 28025 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 5.859360730593607e-06, |
|
"loss": 0.8911, |
|
"step": 28050 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 5.828919330289193e-06, |
|
"loss": 0.8432, |
|
"step": 28075 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 5.7984779299847795e-06, |
|
"loss": 0.7997, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 5.768036529680365e-06, |
|
"loss": 0.836, |
|
"step": 28125 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 5.737595129375952e-06, |
|
"loss": 0.8332, |
|
"step": 28150 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 5.707153729071537e-06, |
|
"loss": 0.8591, |
|
"step": 28175 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 5.676712328767123e-06, |
|
"loss": 0.8475, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 5.64627092846271e-06, |
|
"loss": 0.8842, |
|
"step": 28225 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 5.615829528158295e-06, |
|
"loss": 0.7811, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 5.585388127853881e-06, |
|
"loss": 0.8826, |
|
"step": 28275 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 5.556164383561644e-06, |
|
"loss": 0.8344, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 5.52572298325723e-06, |
|
"loss": 0.8106, |
|
"step": 28325 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 5.495281582952816e-06, |
|
"loss": 0.7875, |
|
"step": 28350 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 5.464840182648402e-06, |
|
"loss": 0.83, |
|
"step": 28375 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 5.434398782343988e-06, |
|
"loss": 0.8188, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 5.403957382039574e-06, |
|
"loss": 0.8448, |
|
"step": 28425 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 5.37351598173516e-06, |
|
"loss": 0.7736, |
|
"step": 28450 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 5.3430745814307465e-06, |
|
"loss": 0.8335, |
|
"step": 28475 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 5.312633181126332e-06, |
|
"loss": 0.8536, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 5.282191780821918e-06, |
|
"loss": 0.8378, |
|
"step": 28525 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 5.251750380517504e-06, |
|
"loss": 0.8067, |
|
"step": 28550 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 5.222526636225267e-06, |
|
"loss": 0.8447, |
|
"step": 28575 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 5.192085235920853e-06, |
|
"loss": 0.8222, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 5.161643835616439e-06, |
|
"loss": 0.813, |
|
"step": 28625 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 5.131202435312025e-06, |
|
"loss": 0.8439, |
|
"step": 28650 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 5.100761035007611e-06, |
|
"loss": 0.8635, |
|
"step": 28675 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 5.070319634703197e-06, |
|
"loss": 0.8653, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 5.0398782343987825e-06, |
|
"loss": 0.827, |
|
"step": 28725 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 5.009436834094369e-06, |
|
"loss": 0.8366, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 4.978995433789955e-06, |
|
"loss": 0.8199, |
|
"step": 28775 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 4.949771689497717e-06, |
|
"loss": 0.9215, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 4.919330289193303e-06, |
|
"loss": 0.8776, |
|
"step": 28825 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.8023, |
|
"step": 28850 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 4.858447488584475e-06, |
|
"loss": 0.8622, |
|
"step": 28875 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 4.8280060882800615e-06, |
|
"loss": 0.801, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 4.797564687975647e-06, |
|
"loss": 0.837, |
|
"step": 28925 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 4.767123287671233e-06, |
|
"loss": 0.8399, |
|
"step": 28950 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 4.736681887366819e-06, |
|
"loss": 0.804, |
|
"step": 28975 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 4.706240487062405e-06, |
|
"loss": 0.8425, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 4.675799086757991e-06, |
|
"loss": 0.8786, |
|
"step": 29025 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 4.645357686453577e-06, |
|
"loss": 0.9328, |
|
"step": 29050 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 4.614916286149163e-06, |
|
"loss": 0.8182, |
|
"step": 29075 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 4.5844748858447495e-06, |
|
"loss": 0.8446, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 4.554033485540335e-06, |
|
"loss": 0.8503, |
|
"step": 29125 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 4.523592085235921e-06, |
|
"loss": 0.8258, |
|
"step": 29150 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 4.493150684931507e-06, |
|
"loss": 0.8692, |
|
"step": 29175 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 4.462709284627093e-06, |
|
"loss": 0.8569, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 4.432267884322679e-06, |
|
"loss": 0.8694, |
|
"step": 29225 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 4.401826484018265e-06, |
|
"loss": 0.836, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 4.371385083713851e-06, |
|
"loss": 0.804, |
|
"step": 29275 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 4.340943683409437e-06, |
|
"loss": 0.8593, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 4.310502283105023e-06, |
|
"loss": 0.8547, |
|
"step": 29325 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 4.280060882800609e-06, |
|
"loss": 0.8705, |
|
"step": 29350 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 4.249619482496195e-06, |
|
"loss": 0.867, |
|
"step": 29375 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 4.219178082191781e-06, |
|
"loss": 0.796, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 4.188736681887367e-06, |
|
"loss": 0.8249, |
|
"step": 29425 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 4.158295281582953e-06, |
|
"loss": 0.8244, |
|
"step": 29450 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 4.127853881278539e-06, |
|
"loss": 0.8224, |
|
"step": 29475 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 4.097412480974125e-06, |
|
"loss": 0.8259, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 4.066971080669711e-06, |
|
"loss": 0.8255, |
|
"step": 29525 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.036529680365297e-06, |
|
"loss": 0.8501, |
|
"step": 29550 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.006088280060883e-06, |
|
"loss": 0.7706, |
|
"step": 29575 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 3.975646879756469e-06, |
|
"loss": 0.7943, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 3.945205479452055e-06, |
|
"loss": 0.7389, |
|
"step": 29625 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.914764079147641e-06, |
|
"loss": 0.7088, |
|
"step": 29650 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.884322678843227e-06, |
|
"loss": 0.772, |
|
"step": 29675 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 3.853881278538813e-06, |
|
"loss": 0.7772, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 3.823439878234399e-06, |
|
"loss": 0.7484, |
|
"step": 29725 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 3.792998477929985e-06, |
|
"loss": 0.7653, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 3.762557077625571e-06, |
|
"loss": 0.7537, |
|
"step": 29775 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 3.732115677321157e-06, |
|
"loss": 0.7248, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 3.701674277016743e-06, |
|
"loss": 0.8037, |
|
"step": 29825 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 3.671232876712329e-06, |
|
"loss": 0.7425, |
|
"step": 29850 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 3.640791476407915e-06, |
|
"loss": 0.7592, |
|
"step": 29875 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 3.610350076103501e-06, |
|
"loss": 0.7541, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 3.579908675799087e-06, |
|
"loss": 0.739, |
|
"step": 29925 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 3.549467275494673e-06, |
|
"loss": 0.7581, |
|
"step": 29950 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 3.519025875190259e-06, |
|
"loss": 0.8068, |
|
"step": 29975 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 3.488584474885845e-06, |
|
"loss": 0.7699, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 3.458143074581431e-06, |
|
"loss": 0.725, |
|
"step": 30025 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 3.4277016742770168e-06, |
|
"loss": 0.7598, |
|
"step": 30050 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 3.397260273972603e-06, |
|
"loss": 0.726, |
|
"step": 30075 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 3.366818873668189e-06, |
|
"loss": 0.7622, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 3.336377473363775e-06, |
|
"loss": 0.7633, |
|
"step": 30125 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 3.3059360730593608e-06, |
|
"loss": 0.7839, |
|
"step": 30150 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 3.275494672754947e-06, |
|
"loss": 0.758, |
|
"step": 30175 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 3.245053272450533e-06, |
|
"loss": 0.7451, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 3.214611872146119e-06, |
|
"loss": 0.7402, |
|
"step": 30225 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 3.1841704718417048e-06, |
|
"loss": 0.733, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 3.153729071537291e-06, |
|
"loss": 0.7695, |
|
"step": 30275 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 3.123287671232877e-06, |
|
"loss": 0.7838, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 3.092846270928463e-06, |
|
"loss": 0.7946, |
|
"step": 30325 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 3.0624048706240488e-06, |
|
"loss": 0.7699, |
|
"step": 30350 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 3.031963470319635e-06, |
|
"loss": 0.7173, |
|
"step": 30375 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 3.001522070015221e-06, |
|
"loss": 0.7182, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 2.971080669710807e-06, |
|
"loss": 0.7544, |
|
"step": 30425 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 2.9406392694063927e-06, |
|
"loss": 0.7678, |
|
"step": 30450 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 2.910197869101979e-06, |
|
"loss": 0.7222, |
|
"step": 30475 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 2.879756468797565e-06, |
|
"loss": 0.7949, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 2.849315068493151e-06, |
|
"loss": 0.7523, |
|
"step": 30525 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 2.8188736681887367e-06, |
|
"loss": 0.7809, |
|
"step": 30550 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 2.788432267884323e-06, |
|
"loss": 0.7836, |
|
"step": 30575 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 2.757990867579909e-06, |
|
"loss": 0.7481, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 2.727549467275495e-06, |
|
"loss": 0.756, |
|
"step": 30625 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 2.6971080669710807e-06, |
|
"loss": 0.8018, |
|
"step": 30650 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.7264, |
|
"step": 30675 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 2.636225266362253e-06, |
|
"loss": 0.7553, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 2.605783866057839e-06, |
|
"loss": 0.8181, |
|
"step": 30725 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 2.5753424657534247e-06, |
|
"loss": 0.8361, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 2.544901065449011e-06, |
|
"loss": 0.7377, |
|
"step": 30775 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 2.514459665144597e-06, |
|
"loss": 0.7311, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 2.484018264840183e-06, |
|
"loss": 0.7555, |
|
"step": 30825 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 2.4535768645357687e-06, |
|
"loss": 0.7609, |
|
"step": 30850 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 2.423135464231355e-06, |
|
"loss": 0.7774, |
|
"step": 30875 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 2.392694063926941e-06, |
|
"loss": 0.7941, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 2.362252663622527e-06, |
|
"loss": 0.782, |
|
"step": 30925 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 2.3318112633181127e-06, |
|
"loss": 0.7627, |
|
"step": 30950 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 2.301369863013699e-06, |
|
"loss": 0.7271, |
|
"step": 30975 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 2.270928462709285e-06, |
|
"loss": 0.7764, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 2.240487062404871e-06, |
|
"loss": 0.8404, |
|
"step": 31025 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 2.2100456621004567e-06, |
|
"loss": 0.7565, |
|
"step": 31050 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 2.179604261796043e-06, |
|
"loss": 0.7917, |
|
"step": 31075 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 2.149162861491629e-06, |
|
"loss": 0.7568, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 2.1187214611872146e-06, |
|
"loss": 0.7278, |
|
"step": 31125 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 2.0882800608828007e-06, |
|
"loss": 0.8142, |
|
"step": 31150 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 2.0578386605783868e-06, |
|
"loss": 0.791, |
|
"step": 31175 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 2.027397260273973e-06, |
|
"loss": 0.7884, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 1.9969558599695586e-06, |
|
"loss": 0.8056, |
|
"step": 31225 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 1.9665144596651447e-06, |
|
"loss": 0.7537, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 1.9360730593607308e-06, |
|
"loss": 0.7794, |
|
"step": 31275 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 1.9056316590563167e-06, |
|
"loss": 0.8168, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 1.8751902587519028e-06, |
|
"loss": 0.756, |
|
"step": 31325 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 1.8447488584474887e-06, |
|
"loss": 0.7625, |
|
"step": 31350 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 1.8143074581430748e-06, |
|
"loss": 0.7638, |
|
"step": 31375 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 1.7838660578386607e-06, |
|
"loss": 0.7532, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 1.7534246575342468e-06, |
|
"loss": 0.7272, |
|
"step": 31425 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 1.7229832572298326e-06, |
|
"loss": 0.7503, |
|
"step": 31450 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 1.6925418569254187e-06, |
|
"loss": 0.7559, |
|
"step": 31475 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 1.6621004566210046e-06, |
|
"loss": 0.7825, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 1.6316590563165907e-06, |
|
"loss": 0.7557, |
|
"step": 31525 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 1.6012176560121766e-06, |
|
"loss": 0.7957, |
|
"step": 31550 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 1.5707762557077627e-06, |
|
"loss": 0.7323, |
|
"step": 31575 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 1.5403348554033486e-06, |
|
"loss": 0.7987, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 1.5098934550989347e-06, |
|
"loss": 0.8013, |
|
"step": 31625 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 1.4794520547945206e-06, |
|
"loss": 0.7903, |
|
"step": 31650 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 1.4490106544901067e-06, |
|
"loss": 0.7038, |
|
"step": 31675 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 1.4185692541856926e-06, |
|
"loss": 0.7153, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 1.3881278538812787e-06, |
|
"loss": 0.7497, |
|
"step": 31725 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 1.3576864535768646e-06, |
|
"loss": 0.7115, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 1.3272450532724507e-06, |
|
"loss": 0.7573, |
|
"step": 31775 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 1.2968036529680366e-06, |
|
"loss": 0.756, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 1.2663622526636227e-06, |
|
"loss": 0.797, |
|
"step": 31825 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 1.2359208523592086e-06, |
|
"loss": 0.7785, |
|
"step": 31850 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 1.2054794520547947e-06, |
|
"loss": 0.75, |
|
"step": 31875 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 1.1750380517503806e-06, |
|
"loss": 0.7955, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 1.1445966514459667e-06, |
|
"loss": 0.8163, |
|
"step": 31925 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 1.1141552511415526e-06, |
|
"loss": 0.7569, |
|
"step": 31950 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 1.0837138508371387e-06, |
|
"loss": 0.7812, |
|
"step": 31975 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 1.0532724505327246e-06, |
|
"loss": 0.7108, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 1.0228310502283107e-06, |
|
"loss": 0.754, |
|
"step": 32025 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 9.923896499238966e-07, |
|
"loss": 0.805, |
|
"step": 32050 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 9.619482496194827e-07, |
|
"loss": 0.7494, |
|
"step": 32075 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 9.315068493150686e-07, |
|
"loss": 0.7821, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 9.010654490106546e-07, |
|
"loss": 0.8192, |
|
"step": 32125 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 8.706240487062406e-07, |
|
"loss": 0.7508, |
|
"step": 32150 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 8.401826484018266e-07, |
|
"loss": 0.7274, |
|
"step": 32175 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 8.097412480974126e-07, |
|
"loss": 0.786, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 7.792998477929986e-07, |
|
"loss": 0.7535, |
|
"step": 32225 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 7.488584474885845e-07, |
|
"loss": 0.7448, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 7.184170471841705e-07, |
|
"loss": 0.7506, |
|
"step": 32275 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 6.879756468797565e-07, |
|
"loss": 0.7662, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 6.575342465753425e-07, |
|
"loss": 0.7408, |
|
"step": 32325 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 6.270928462709285e-07, |
|
"loss": 0.7333, |
|
"step": 32350 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 5.966514459665146e-07, |
|
"loss": 0.7941, |
|
"step": 32375 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 5.662100456621006e-07, |
|
"loss": 0.7735, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 5.357686453576865e-07, |
|
"loss": 0.7741, |
|
"step": 32425 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 5.053272450532725e-07, |
|
"loss": 0.7857, |
|
"step": 32450 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 4.748858447488585e-07, |
|
"loss": 0.8137, |
|
"step": 32475 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 0.7557, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 4.140030441400305e-07, |
|
"loss": 0.7187, |
|
"step": 32525 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 3.835616438356165e-07, |
|
"loss": 0.7536, |
|
"step": 32550 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 3.531202435312025e-07, |
|
"loss": 0.7462, |
|
"step": 32575 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 3.226788432267885e-07, |
|
"loss": 0.7967, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 2.922374429223744e-07, |
|
"loss": 0.7531, |
|
"step": 32625 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 2.617960426179604e-07, |
|
"loss": 0.7584, |
|
"step": 32650 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 2.3135464231354645e-07, |
|
"loss": 0.7664, |
|
"step": 32675 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 2.0091324200913244e-07, |
|
"loss": 0.8058, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 1.7047184170471844e-07, |
|
"loss": 0.795, |
|
"step": 32725 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 1.4003044140030444e-07, |
|
"loss": 0.7861, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 1.0958904109589042e-07, |
|
"loss": 0.7567, |
|
"step": 32775 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 7.914764079147642e-08, |
|
"loss": 0.7481, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 4.870624048706241e-08, |
|
"loss": 0.7556, |
|
"step": 32825 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.9482496194824964e-08, |
|
"loss": 0.7565, |
|
"step": 32850 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 32850, |
|
"total_flos": 4.270496328921907e+17, |
|
"train_loss": 1.3997784228157961, |
|
"train_runtime": 9169.8515, |
|
"train_samples_per_second": 21.49, |
|
"train_steps_per_second": 3.582 |
|
} |
|
], |
|
"max_steps": 32850, |
|
"num_train_epochs": 10, |
|
"total_flos": 4.270496328921907e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|