whisper-th-large-v3 / trainer_state.json
tensorops's picture
add model
42685a6
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.8229775327133569,
"eval_steps": 500,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.0453611334320685e-06,
"loss": 0.3087,
"step": 25
},
{
"epoch": 0.0,
"learning_rate": 6.229195710491767e-06,
"loss": 0.3064,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 6.903829450223392e-06,
"loss": 0.3032,
"step": 75
},
{
"epoch": 0.01,
"learning_rate": 7.377725845391017e-06,
"loss": 0.2897,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 7.743343231239583e-06,
"loss": 0.2752,
"step": 125
},
{
"epoch": 0.01,
"learning_rate": 8.041073861170494e-06,
"loss": 0.2695,
"step": 150
},
{
"epoch": 0.01,
"learning_rate": 8.292222957399574e-06,
"loss": 0.2657,
"step": 175
},
{
"epoch": 0.02,
"learning_rate": 8.509413541357755e-06,
"loss": 0.2567,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 8.700744577655557e-06,
"loss": 0.2824,
"step": 225
},
{
"epoch": 0.02,
"learning_rate": 8.871723942761204e-06,
"loss": 0.266,
"step": 250
},
{
"epoch": 0.02,
"learning_rate": 9.026267958246849e-06,
"loss": 0.2634,
"step": 275
},
{
"epoch": 0.02,
"learning_rate": 9.16726106663399e-06,
"loss": 0.2691,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 9.296889251455016e-06,
"loss": 0.256,
"step": 325
},
{
"epoch": 0.03,
"learning_rate": 9.416848797368692e-06,
"loss": 0.2534,
"step": 350
},
{
"epoch": 0.03,
"learning_rate": 9.528482449516371e-06,
"loss": 0.2719,
"step": 375
},
{
"epoch": 0.03,
"learning_rate": 9.632871309784314e-06,
"loss": 0.2646,
"step": 400
},
{
"epoch": 0.03,
"learning_rate": 9.73089868785391e-06,
"loss": 0.2508,
"step": 425
},
{
"epoch": 0.04,
"learning_rate": 9.823295589572114e-06,
"loss": 0.2521,
"step": 450
},
{
"epoch": 0.04,
"learning_rate": 9.910673836465484e-06,
"loss": 0.2381,
"step": 475
},
{
"epoch": 0.04,
"learning_rate": 9.993550644973805e-06,
"loss": 0.2728,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 9.976842105263158e-06,
"loss": 0.2246,
"step": 525
},
{
"epoch": 0.05,
"learning_rate": 9.950526315789475e-06,
"loss": 0.2373,
"step": 550
},
{
"epoch": 0.05,
"learning_rate": 9.92421052631579e-06,
"loss": 0.2511,
"step": 575
},
{
"epoch": 0.05,
"learning_rate": 9.897894736842107e-06,
"loss": 0.246,
"step": 600
},
{
"epoch": 0.05,
"learning_rate": 9.871578947368422e-06,
"loss": 0.2273,
"step": 625
},
{
"epoch": 0.05,
"learning_rate": 9.845263157894738e-06,
"loss": 0.2509,
"step": 650
},
{
"epoch": 0.06,
"learning_rate": 9.818947368421053e-06,
"loss": 0.2126,
"step": 675
},
{
"epoch": 0.06,
"learning_rate": 9.79263157894737e-06,
"loss": 0.2007,
"step": 700
},
{
"epoch": 0.06,
"learning_rate": 9.766315789473685e-06,
"loss": 0.248,
"step": 725
},
{
"epoch": 0.06,
"learning_rate": 9.74e-06,
"loss": 0.2357,
"step": 750
},
{
"epoch": 0.06,
"learning_rate": 9.713684210526317e-06,
"loss": 0.2349,
"step": 775
},
{
"epoch": 0.07,
"learning_rate": 9.687368421052632e-06,
"loss": 0.2388,
"step": 800
},
{
"epoch": 0.07,
"learning_rate": 9.661052631578948e-06,
"loss": 0.2527,
"step": 825
},
{
"epoch": 0.07,
"learning_rate": 9.634736842105265e-06,
"loss": 0.2283,
"step": 850
},
{
"epoch": 0.07,
"learning_rate": 9.60842105263158e-06,
"loss": 0.2347,
"step": 875
},
{
"epoch": 0.07,
"learning_rate": 9.582105263157897e-06,
"loss": 0.2423,
"step": 900
},
{
"epoch": 0.08,
"learning_rate": 9.555789473684211e-06,
"loss": 0.2306,
"step": 925
},
{
"epoch": 0.08,
"learning_rate": 9.529473684210528e-06,
"loss": 0.2452,
"step": 950
},
{
"epoch": 0.08,
"learning_rate": 9.503157894736843e-06,
"loss": 0.2157,
"step": 975
},
{
"epoch": 0.08,
"learning_rate": 9.476842105263158e-06,
"loss": 0.2229,
"step": 1000
},
{
"epoch": 0.08,
"learning_rate": 9.452631578947368e-06,
"loss": 0.2176,
"step": 1025
},
{
"epoch": 0.09,
"learning_rate": 9.426315789473685e-06,
"loss": 0.2204,
"step": 1050
},
{
"epoch": 0.09,
"learning_rate": 9.4e-06,
"loss": 0.2197,
"step": 1075
},
{
"epoch": 0.09,
"learning_rate": 9.373684210526316e-06,
"loss": 0.2192,
"step": 1100
},
{
"epoch": 0.09,
"learning_rate": 9.347368421052633e-06,
"loss": 0.2084,
"step": 1125
},
{
"epoch": 0.09,
"learning_rate": 9.321052631578948e-06,
"loss": 0.2202,
"step": 1150
},
{
"epoch": 0.1,
"learning_rate": 9.294736842105265e-06,
"loss": 0.2066,
"step": 1175
},
{
"epoch": 0.1,
"learning_rate": 9.26842105263158e-06,
"loss": 0.1891,
"step": 1200
},
{
"epoch": 0.1,
"learning_rate": 9.242105263157896e-06,
"loss": 0.2137,
"step": 1225
},
{
"epoch": 0.1,
"learning_rate": 9.215789473684211e-06,
"loss": 0.2114,
"step": 1250
},
{
"epoch": 0.1,
"learning_rate": 9.189473684210526e-06,
"loss": 0.2036,
"step": 1275
},
{
"epoch": 0.11,
"learning_rate": 9.163157894736843e-06,
"loss": 0.2245,
"step": 1300
},
{
"epoch": 0.11,
"learning_rate": 9.136842105263158e-06,
"loss": 0.206,
"step": 1325
},
{
"epoch": 0.11,
"learning_rate": 9.110526315789475e-06,
"loss": 0.2267,
"step": 1350
},
{
"epoch": 0.11,
"learning_rate": 9.08421052631579e-06,
"loss": 0.2308,
"step": 1375
},
{
"epoch": 0.12,
"learning_rate": 9.057894736842106e-06,
"loss": 0.1975,
"step": 1400
},
{
"epoch": 0.12,
"learning_rate": 9.031578947368423e-06,
"loss": 0.206,
"step": 1425
},
{
"epoch": 0.12,
"learning_rate": 9.005263157894738e-06,
"loss": 0.1878,
"step": 1450
},
{
"epoch": 0.12,
"learning_rate": 8.978947368421055e-06,
"loss": 0.2008,
"step": 1475
},
{
"epoch": 0.12,
"learning_rate": 8.95263157894737e-06,
"loss": 0.2034,
"step": 1500
},
{
"epoch": 0.13,
"learning_rate": 8.926315789473685e-06,
"loss": 0.2075,
"step": 1525
},
{
"epoch": 0.13,
"learning_rate": 8.900000000000001e-06,
"loss": 0.2086,
"step": 1550
},
{
"epoch": 0.13,
"learning_rate": 8.873684210526316e-06,
"loss": 0.1993,
"step": 1575
},
{
"epoch": 0.13,
"learning_rate": 8.847368421052633e-06,
"loss": 0.204,
"step": 1600
},
{
"epoch": 0.13,
"learning_rate": 8.821052631578948e-06,
"loss": 0.1998,
"step": 1625
},
{
"epoch": 0.14,
"learning_rate": 8.794736842105264e-06,
"loss": 0.222,
"step": 1650
},
{
"epoch": 0.14,
"learning_rate": 8.76842105263158e-06,
"loss": 0.2151,
"step": 1675
},
{
"epoch": 0.14,
"learning_rate": 8.742105263157894e-06,
"loss": 0.1854,
"step": 1700
},
{
"epoch": 0.14,
"learning_rate": 8.715789473684211e-06,
"loss": 0.2148,
"step": 1725
},
{
"epoch": 0.14,
"learning_rate": 8.689473684210526e-06,
"loss": 0.1965,
"step": 1750
},
{
"epoch": 0.15,
"learning_rate": 8.663157894736843e-06,
"loss": 0.216,
"step": 1775
},
{
"epoch": 0.15,
"learning_rate": 8.63684210526316e-06,
"loss": 0.2021,
"step": 1800
},
{
"epoch": 0.15,
"learning_rate": 8.610526315789474e-06,
"loss": 0.1922,
"step": 1825
},
{
"epoch": 0.15,
"learning_rate": 8.584210526315791e-06,
"loss": 0.2068,
"step": 1850
},
{
"epoch": 0.15,
"learning_rate": 8.557894736842106e-06,
"loss": 0.2174,
"step": 1875
},
{
"epoch": 0.16,
"learning_rate": 8.531578947368423e-06,
"loss": 0.1834,
"step": 1900
},
{
"epoch": 0.16,
"learning_rate": 8.505263157894738e-06,
"loss": 0.1714,
"step": 1925
},
{
"epoch": 0.16,
"learning_rate": 8.478947368421053e-06,
"loss": 0.2106,
"step": 1950
},
{
"epoch": 0.16,
"learning_rate": 8.45263157894737e-06,
"loss": 0.2222,
"step": 1975
},
{
"epoch": 0.16,
"learning_rate": 8.426315789473684e-06,
"loss": 0.1911,
"step": 2000
},
{
"epoch": 0.17,
"learning_rate": 8.402105263157896e-06,
"loss": 0.1918,
"step": 2025
},
{
"epoch": 0.17,
"learning_rate": 8.375789473684211e-06,
"loss": 0.1831,
"step": 2050
},
{
"epoch": 0.17,
"learning_rate": 8.349473684210528e-06,
"loss": 0.1808,
"step": 2075
},
{
"epoch": 0.17,
"learning_rate": 8.323157894736843e-06,
"loss": 0.1833,
"step": 2100
},
{
"epoch": 0.17,
"learning_rate": 8.29684210526316e-06,
"loss": 0.1929,
"step": 2125
},
{
"epoch": 0.18,
"learning_rate": 8.270526315789474e-06,
"loss": 0.1846,
"step": 2150
},
{
"epoch": 0.18,
"learning_rate": 8.244210526315791e-06,
"loss": 0.1921,
"step": 2175
},
{
"epoch": 0.18,
"learning_rate": 8.217894736842106e-06,
"loss": 0.1928,
"step": 2200
},
{
"epoch": 0.18,
"learning_rate": 8.19157894736842e-06,
"loss": 0.1808,
"step": 2225
},
{
"epoch": 0.19,
"learning_rate": 8.165263157894737e-06,
"loss": 0.191,
"step": 2250
},
{
"epoch": 0.19,
"learning_rate": 8.138947368421052e-06,
"loss": 0.1832,
"step": 2275
},
{
"epoch": 0.19,
"learning_rate": 8.112631578947369e-06,
"loss": 0.2098,
"step": 2300
},
{
"epoch": 0.19,
"learning_rate": 8.086315789473684e-06,
"loss": 0.2125,
"step": 2325
},
{
"epoch": 0.19,
"learning_rate": 8.06e-06,
"loss": 0.1671,
"step": 2350
},
{
"epoch": 0.2,
"learning_rate": 8.033684210526317e-06,
"loss": 0.185,
"step": 2375
},
{
"epoch": 0.2,
"learning_rate": 8.007368421052632e-06,
"loss": 0.1963,
"step": 2400
},
{
"epoch": 0.2,
"learning_rate": 7.981052631578949e-06,
"loss": 0.1847,
"step": 2425
},
{
"epoch": 0.2,
"learning_rate": 7.954736842105264e-06,
"loss": 0.1862,
"step": 2450
},
{
"epoch": 0.2,
"learning_rate": 7.928421052631579e-06,
"loss": 0.1555,
"step": 2475
},
{
"epoch": 0.21,
"learning_rate": 7.902105263157896e-06,
"loss": 0.1682,
"step": 2500
},
{
"epoch": 0.21,
"learning_rate": 7.87578947368421e-06,
"loss": 0.1705,
"step": 2525
},
{
"epoch": 0.21,
"learning_rate": 7.849473684210527e-06,
"loss": 0.1983,
"step": 2550
},
{
"epoch": 0.21,
"learning_rate": 7.823157894736842e-06,
"loss": 0.1762,
"step": 2575
},
{
"epoch": 0.21,
"learning_rate": 7.796842105263159e-06,
"loss": 0.1835,
"step": 2600
},
{
"epoch": 0.22,
"learning_rate": 7.770526315789474e-06,
"loss": 0.1887,
"step": 2625
},
{
"epoch": 0.22,
"learning_rate": 7.744210526315789e-06,
"loss": 0.1724,
"step": 2650
},
{
"epoch": 0.22,
"learning_rate": 7.717894736842107e-06,
"loss": 0.1937,
"step": 2675
},
{
"epoch": 0.22,
"learning_rate": 7.691578947368422e-06,
"loss": 0.1886,
"step": 2700
},
{
"epoch": 0.22,
"learning_rate": 7.665263157894737e-06,
"loss": 0.2126,
"step": 2725
},
{
"epoch": 0.23,
"learning_rate": 7.638947368421054e-06,
"loss": 0.2043,
"step": 2750
},
{
"epoch": 0.23,
"learning_rate": 7.61263157894737e-06,
"loss": 0.1705,
"step": 2775
},
{
"epoch": 0.23,
"learning_rate": 7.586315789473685e-06,
"loss": 0.1581,
"step": 2800
},
{
"epoch": 0.23,
"learning_rate": 7.5600000000000005e-06,
"loss": 0.1699,
"step": 2825
},
{
"epoch": 0.23,
"learning_rate": 7.533684210526316e-06,
"loss": 0.1801,
"step": 2850
},
{
"epoch": 0.24,
"learning_rate": 7.507368421052632e-06,
"loss": 0.1732,
"step": 2875
},
{
"epoch": 0.24,
"learning_rate": 7.481052631578948e-06,
"loss": 0.1846,
"step": 2900
},
{
"epoch": 0.24,
"learning_rate": 7.454736842105264e-06,
"loss": 0.1832,
"step": 2925
},
{
"epoch": 0.24,
"learning_rate": 7.4284210526315796e-06,
"loss": 0.1581,
"step": 2950
},
{
"epoch": 0.24,
"learning_rate": 7.4021052631578945e-06,
"loss": 0.1744,
"step": 2975
},
{
"epoch": 0.25,
"learning_rate": 7.37578947368421e-06,
"loss": 0.1718,
"step": 3000
},
{
"epoch": 0.25,
"learning_rate": 7.351578947368422e-06,
"loss": 0.181,
"step": 3025
},
{
"epoch": 0.25,
"learning_rate": 7.325263157894738e-06,
"loss": 0.1563,
"step": 3050
},
{
"epoch": 0.25,
"learning_rate": 7.298947368421053e-06,
"loss": 0.1821,
"step": 3075
},
{
"epoch": 0.26,
"learning_rate": 7.272631578947369e-06,
"loss": 0.1635,
"step": 3100
},
{
"epoch": 0.26,
"learning_rate": 7.2463157894736845e-06,
"loss": 0.1922,
"step": 3125
},
{
"epoch": 0.26,
"learning_rate": 7.22e-06,
"loss": 0.1771,
"step": 3150
},
{
"epoch": 0.26,
"learning_rate": 7.193684210526316e-06,
"loss": 0.1721,
"step": 3175
},
{
"epoch": 0.26,
"learning_rate": 7.167368421052632e-06,
"loss": 0.1518,
"step": 3200
},
{
"epoch": 0.27,
"learning_rate": 7.141052631578948e-06,
"loss": 0.1621,
"step": 3225
},
{
"epoch": 0.27,
"learning_rate": 7.1147368421052645e-06,
"loss": 0.1771,
"step": 3250
},
{
"epoch": 0.27,
"learning_rate": 7.08842105263158e-06,
"loss": 0.1703,
"step": 3275
},
{
"epoch": 0.27,
"learning_rate": 7.062105263157896e-06,
"loss": 0.1775,
"step": 3300
},
{
"epoch": 0.27,
"learning_rate": 7.035789473684211e-06,
"loss": 0.1826,
"step": 3325
},
{
"epoch": 0.28,
"learning_rate": 7.009473684210527e-06,
"loss": 0.156,
"step": 3350
},
{
"epoch": 0.28,
"learning_rate": 6.983157894736843e-06,
"loss": 0.1743,
"step": 3375
},
{
"epoch": 0.28,
"learning_rate": 6.9568421052631585e-06,
"loss": 0.1821,
"step": 3400
},
{
"epoch": 0.28,
"learning_rate": 6.930526315789474e-06,
"loss": 0.1784,
"step": 3425
},
{
"epoch": 0.28,
"learning_rate": 6.90421052631579e-06,
"loss": 0.1634,
"step": 3450
},
{
"epoch": 0.29,
"learning_rate": 6.877894736842106e-06,
"loss": 0.1824,
"step": 3475
},
{
"epoch": 0.29,
"learning_rate": 6.851578947368421e-06,
"loss": 0.1852,
"step": 3500
},
{
"epoch": 0.29,
"learning_rate": 6.825263157894737e-06,
"loss": 0.1851,
"step": 3525
},
{
"epoch": 0.29,
"learning_rate": 6.798947368421053e-06,
"loss": 0.1754,
"step": 3550
},
{
"epoch": 0.29,
"learning_rate": 6.772631578947368e-06,
"loss": 0.1805,
"step": 3575
},
{
"epoch": 0.3,
"learning_rate": 6.746315789473685e-06,
"loss": 0.1773,
"step": 3600
},
{
"epoch": 0.3,
"learning_rate": 6.720000000000001e-06,
"loss": 0.167,
"step": 3625
},
{
"epoch": 0.3,
"learning_rate": 6.693684210526317e-06,
"loss": 0.1657,
"step": 3650
},
{
"epoch": 0.3,
"learning_rate": 6.6673684210526325e-06,
"loss": 0.2064,
"step": 3675
},
{
"epoch": 0.3,
"learning_rate": 6.641052631578948e-06,
"loss": 0.1741,
"step": 3700
},
{
"epoch": 0.31,
"learning_rate": 6.614736842105264e-06,
"loss": 0.1899,
"step": 3725
},
{
"epoch": 0.31,
"learning_rate": 6.588421052631579e-06,
"loss": 0.1531,
"step": 3750
},
{
"epoch": 0.31,
"learning_rate": 6.562105263157895e-06,
"loss": 0.1637,
"step": 3775
},
{
"epoch": 0.31,
"learning_rate": 6.535789473684211e-06,
"loss": 0.1386,
"step": 3800
},
{
"epoch": 0.31,
"learning_rate": 6.509473684210527e-06,
"loss": 0.1668,
"step": 3825
},
{
"epoch": 0.32,
"learning_rate": 6.483157894736842e-06,
"loss": 0.166,
"step": 3850
},
{
"epoch": 0.32,
"learning_rate": 6.456842105263158e-06,
"loss": 0.1653,
"step": 3875
},
{
"epoch": 0.32,
"learning_rate": 6.430526315789474e-06,
"loss": 0.161,
"step": 3900
},
{
"epoch": 0.32,
"learning_rate": 6.404210526315791e-06,
"loss": 0.172,
"step": 3925
},
{
"epoch": 0.33,
"learning_rate": 6.3778947368421065e-06,
"loss": 0.1625,
"step": 3950
},
{
"epoch": 0.33,
"learning_rate": 6.351578947368422e-06,
"loss": 0.1595,
"step": 3975
},
{
"epoch": 0.33,
"learning_rate": 6.325263157894737e-06,
"loss": 0.1513,
"step": 4000
},
{
"epoch": 0.33,
"learning_rate": 6.301052631578947e-06,
"loss": 0.1762,
"step": 4025
},
{
"epoch": 0.33,
"learning_rate": 6.274736842105263e-06,
"loss": 0.1448,
"step": 4050
},
{
"epoch": 0.34,
"learning_rate": 6.248421052631579e-06,
"loss": 0.1773,
"step": 4075
},
{
"epoch": 0.34,
"learning_rate": 6.222105263157895e-06,
"loss": 0.1507,
"step": 4100
},
{
"epoch": 0.34,
"learning_rate": 6.195789473684211e-06,
"loss": 0.1508,
"step": 4125
},
{
"epoch": 0.34,
"learning_rate": 6.1694736842105265e-06,
"loss": 0.1587,
"step": 4150
},
{
"epoch": 0.34,
"learning_rate": 6.143157894736843e-06,
"loss": 0.1707,
"step": 4175
},
{
"epoch": 0.35,
"learning_rate": 6.116842105263159e-06,
"loss": 0.1648,
"step": 4200
},
{
"epoch": 0.35,
"learning_rate": 6.090526315789475e-06,
"loss": 0.165,
"step": 4225
},
{
"epoch": 0.35,
"learning_rate": 6.0642105263157906e-06,
"loss": 0.1892,
"step": 4250
},
{
"epoch": 0.35,
"learning_rate": 6.0378947368421055e-06,
"loss": 0.1698,
"step": 4275
},
{
"epoch": 0.35,
"learning_rate": 6.011578947368421e-06,
"loss": 0.1692,
"step": 4300
},
{
"epoch": 0.36,
"learning_rate": 5.985263157894737e-06,
"loss": 0.1504,
"step": 4325
},
{
"epoch": 0.36,
"learning_rate": 5.958947368421053e-06,
"loss": 0.1435,
"step": 4350
},
{
"epoch": 0.36,
"learning_rate": 5.932631578947369e-06,
"loss": 0.1532,
"step": 4375
},
{
"epoch": 0.36,
"learning_rate": 5.906315789473685e-06,
"loss": 0.1575,
"step": 4400
},
{
"epoch": 0.36,
"learning_rate": 5.8800000000000005e-06,
"loss": 0.163,
"step": 4425
},
{
"epoch": 0.37,
"learning_rate": 5.853684210526316e-06,
"loss": 0.1524,
"step": 4450
},
{
"epoch": 0.37,
"learning_rate": 5.827368421052631e-06,
"loss": 0.157,
"step": 4475
},
{
"epoch": 0.37,
"learning_rate": 5.801052631578949e-06,
"loss": 0.1593,
"step": 4500
},
{
"epoch": 0.37,
"learning_rate": 5.774736842105264e-06,
"loss": 0.1696,
"step": 4525
},
{
"epoch": 0.37,
"learning_rate": 5.7484210526315795e-06,
"loss": 0.1472,
"step": 4550
},
{
"epoch": 0.38,
"learning_rate": 5.722105263157895e-06,
"loss": 0.1534,
"step": 4575
},
{
"epoch": 0.38,
"learning_rate": 5.695789473684211e-06,
"loss": 0.1552,
"step": 4600
},
{
"epoch": 0.38,
"learning_rate": 5.669473684210527e-06,
"loss": 0.1661,
"step": 4625
},
{
"epoch": 0.38,
"learning_rate": 5.643157894736843e-06,
"loss": 0.1526,
"step": 4650
},
{
"epoch": 0.38,
"learning_rate": 5.616842105263159e-06,
"loss": 0.1454,
"step": 4675
},
{
"epoch": 0.39,
"learning_rate": 5.590526315789474e-06,
"loss": 0.1505,
"step": 4700
},
{
"epoch": 0.39,
"learning_rate": 5.5642105263157894e-06,
"loss": 0.1593,
"step": 4725
},
{
"epoch": 0.39,
"learning_rate": 5.537894736842105e-06,
"loss": 0.1653,
"step": 4750
},
{
"epoch": 0.39,
"learning_rate": 5.511578947368421e-06,
"loss": 0.1298,
"step": 4775
},
{
"epoch": 0.4,
"learning_rate": 5.485263157894737e-06,
"loss": 0.157,
"step": 4800
},
{
"epoch": 0.4,
"learning_rate": 5.458947368421053e-06,
"loss": 0.1549,
"step": 4825
},
{
"epoch": 0.4,
"learning_rate": 5.432631578947369e-06,
"loss": 0.1516,
"step": 4850
},
{
"epoch": 0.4,
"learning_rate": 5.406315789473685e-06,
"loss": 0.1511,
"step": 4875
},
{
"epoch": 0.4,
"learning_rate": 5.380000000000001e-06,
"loss": 0.136,
"step": 4900
},
{
"epoch": 0.41,
"learning_rate": 5.353684210526317e-06,
"loss": 0.1558,
"step": 4925
},
{
"epoch": 0.41,
"learning_rate": 5.327368421052632e-06,
"loss": 0.1546,
"step": 4950
},
{
"epoch": 0.41,
"learning_rate": 5.301052631578948e-06,
"loss": 0.1524,
"step": 4975
},
{
"epoch": 0.41,
"learning_rate": 5.2747368421052634e-06,
"loss": 0.1531,
"step": 5000
},
{
"epoch": 0.41,
"learning_rate": 5.2505263157894735e-06,
"loss": 0.1541,
"step": 5025
},
{
"epoch": 0.42,
"learning_rate": 5.224210526315789e-06,
"loss": 0.1515,
"step": 5050
},
{
"epoch": 0.42,
"learning_rate": 5.197894736842106e-06,
"loss": 0.1483,
"step": 5075
},
{
"epoch": 0.42,
"learning_rate": 5.171578947368422e-06,
"loss": 0.1495,
"step": 5100
},
{
"epoch": 0.42,
"learning_rate": 5.145263157894738e-06,
"loss": 0.1593,
"step": 5125
},
{
"epoch": 0.42,
"learning_rate": 5.118947368421053e-06,
"loss": 0.1507,
"step": 5150
},
{
"epoch": 0.43,
"learning_rate": 5.092631578947369e-06,
"loss": 0.1458,
"step": 5175
},
{
"epoch": 0.43,
"learning_rate": 5.066315789473685e-06,
"loss": 0.1426,
"step": 5200
},
{
"epoch": 0.43,
"learning_rate": 5.04e-06,
"loss": 0.1422,
"step": 5225
},
{
"epoch": 0.43,
"learning_rate": 5.013684210526316e-06,
"loss": 0.1586,
"step": 5250
},
{
"epoch": 0.43,
"learning_rate": 4.987368421052632e-06,
"loss": 0.1416,
"step": 5275
},
{
"epoch": 0.44,
"learning_rate": 4.9610526315789475e-06,
"loss": 0.1757,
"step": 5300
},
{
"epoch": 0.44,
"learning_rate": 4.934736842105264e-06,
"loss": 0.1338,
"step": 5325
},
{
"epoch": 0.44,
"learning_rate": 4.908421052631579e-06,
"loss": 0.1545,
"step": 5350
},
{
"epoch": 0.44,
"learning_rate": 4.882105263157895e-06,
"loss": 0.1494,
"step": 5375
},
{
"epoch": 0.44,
"learning_rate": 4.855789473684211e-06,
"loss": 0.1627,
"step": 5400
},
{
"epoch": 0.45,
"learning_rate": 4.8294736842105266e-06,
"loss": 0.1554,
"step": 5425
},
{
"epoch": 0.45,
"learning_rate": 4.803157894736842e-06,
"loss": 0.1487,
"step": 5450
},
{
"epoch": 0.45,
"learning_rate": 4.776842105263158e-06,
"loss": 0.1472,
"step": 5475
},
{
"epoch": 0.45,
"learning_rate": 4.750526315789474e-06,
"loss": 0.1345,
"step": 5500
},
{
"epoch": 0.45,
"learning_rate": 4.72421052631579e-06,
"loss": 0.1513,
"step": 5525
},
{
"epoch": 0.46,
"learning_rate": 4.697894736842106e-06,
"loss": 0.1376,
"step": 5550
},
{
"epoch": 0.46,
"learning_rate": 4.6715789473684215e-06,
"loss": 0.1554,
"step": 5575
},
{
"epoch": 0.46,
"learning_rate": 4.645263157894737e-06,
"loss": 0.1475,
"step": 5600
},
{
"epoch": 0.46,
"learning_rate": 4.618947368421053e-06,
"loss": 0.1449,
"step": 5625
},
{
"epoch": 0.46,
"learning_rate": 4.592631578947369e-06,
"loss": 0.128,
"step": 5650
},
{
"epoch": 0.47,
"learning_rate": 4.566315789473685e-06,
"loss": 0.1386,
"step": 5675
},
{
"epoch": 0.47,
"learning_rate": 4.540000000000001e-06,
"loss": 0.1701,
"step": 5700
},
{
"epoch": 0.47,
"learning_rate": 4.513684210526316e-06,
"loss": 0.1492,
"step": 5725
},
{
"epoch": 0.47,
"learning_rate": 4.487368421052632e-06,
"loss": 0.1476,
"step": 5750
},
{
"epoch": 0.48,
"learning_rate": 4.461052631578948e-06,
"loss": 0.146,
"step": 5775
},
{
"epoch": 0.48,
"learning_rate": 4.434736842105263e-06,
"loss": 0.1399,
"step": 5800
},
{
"epoch": 0.48,
"learning_rate": 4.408421052631579e-06,
"loss": 0.1462,
"step": 5825
},
{
"epoch": 0.48,
"learning_rate": 4.3821052631578955e-06,
"loss": 0.1369,
"step": 5850
},
{
"epoch": 0.48,
"learning_rate": 4.355789473684211e-06,
"loss": 0.1576,
"step": 5875
},
{
"epoch": 0.49,
"learning_rate": 4.329473684210527e-06,
"loss": 0.1639,
"step": 5900
},
{
"epoch": 0.49,
"learning_rate": 4.303157894736842e-06,
"loss": 0.1439,
"step": 5925
},
{
"epoch": 0.49,
"learning_rate": 4.276842105263158e-06,
"loss": 0.1548,
"step": 5950
},
{
"epoch": 0.49,
"learning_rate": 4.250526315789474e-06,
"loss": 0.1377,
"step": 5975
},
{
"epoch": 0.49,
"learning_rate": 4.22421052631579e-06,
"loss": 0.1455,
"step": 6000
},
{
"epoch": 0.5,
"learning_rate": 4.2000000000000004e-06,
"loss": 0.1581,
"step": 6025
},
{
"epoch": 0.5,
"learning_rate": 4.173684210526316e-06,
"loss": 0.1463,
"step": 6050
},
{
"epoch": 0.5,
"learning_rate": 4.147368421052632e-06,
"loss": 0.1524,
"step": 6075
},
{
"epoch": 0.5,
"learning_rate": 4.121052631578948e-06,
"loss": 0.1262,
"step": 6100
},
{
"epoch": 0.5,
"learning_rate": 4.094736842105264e-06,
"loss": 0.1492,
"step": 6125
},
{
"epoch": 0.51,
"learning_rate": 4.0684210526315795e-06,
"loss": 0.1378,
"step": 6150
},
{
"epoch": 0.51,
"learning_rate": 4.042105263157895e-06,
"loss": 0.1508,
"step": 6175
},
{
"epoch": 0.51,
"learning_rate": 4.01578947368421e-06,
"loss": 0.1329,
"step": 6200
},
{
"epoch": 0.51,
"learning_rate": 3.989473684210526e-06,
"loss": 0.1342,
"step": 6225
},
{
"epoch": 0.51,
"learning_rate": 3.963157894736843e-06,
"loss": 0.1336,
"step": 6250
},
{
"epoch": 0.52,
"learning_rate": 3.936842105263159e-06,
"loss": 0.1259,
"step": 6275
},
{
"epoch": 0.52,
"learning_rate": 3.9105263157894744e-06,
"loss": 0.1402,
"step": 6300
},
{
"epoch": 0.52,
"learning_rate": 3.884210526315789e-06,
"loss": 0.1361,
"step": 6325
},
{
"epoch": 0.52,
"learning_rate": 3.857894736842105e-06,
"loss": 0.149,
"step": 6350
},
{
"epoch": 0.52,
"learning_rate": 3.831578947368421e-06,
"loss": 0.1471,
"step": 6375
},
{
"epoch": 0.53,
"learning_rate": 3.805263157894737e-06,
"loss": 0.1264,
"step": 6400
},
{
"epoch": 0.53,
"learning_rate": 3.778947368421053e-06,
"loss": 0.1329,
"step": 6425
},
{
"epoch": 0.53,
"learning_rate": 3.752631578947369e-06,
"loss": 0.1643,
"step": 6450
},
{
"epoch": 0.53,
"learning_rate": 3.7263157894736848e-06,
"loss": 0.1489,
"step": 6475
},
{
"epoch": 0.53,
"learning_rate": 3.7e-06,
"loss": 0.1234,
"step": 6500
},
{
"epoch": 0.54,
"learning_rate": 3.673684210526316e-06,
"loss": 0.1318,
"step": 6525
},
{
"epoch": 0.54,
"learning_rate": 3.6473684210526318e-06,
"loss": 0.1481,
"step": 6550
},
{
"epoch": 0.54,
"learning_rate": 3.621052631578948e-06,
"loss": 0.1454,
"step": 6575
},
{
"epoch": 0.54,
"learning_rate": 3.5947368421052634e-06,
"loss": 0.1493,
"step": 6600
},
{
"epoch": 0.55,
"learning_rate": 3.5684210526315792e-06,
"loss": 0.1371,
"step": 6625
},
{
"epoch": 0.55,
"learning_rate": 3.542105263157895e-06,
"loss": 0.1435,
"step": 6650
},
{
"epoch": 0.55,
"learning_rate": 3.515789473684211e-06,
"loss": 0.1475,
"step": 6675
},
{
"epoch": 0.55,
"learning_rate": 3.4894736842105263e-06,
"loss": 0.1509,
"step": 6700
},
{
"epoch": 0.55,
"learning_rate": 3.463157894736842e-06,
"loss": 0.1512,
"step": 6725
},
{
"epoch": 0.56,
"learning_rate": 3.4368421052631583e-06,
"loss": 0.1376,
"step": 6750
},
{
"epoch": 0.56,
"learning_rate": 3.410526315789474e-06,
"loss": 0.1333,
"step": 6775
},
{
"epoch": 0.56,
"learning_rate": 3.38421052631579e-06,
"loss": 0.1355,
"step": 6800
},
{
"epoch": 0.56,
"learning_rate": 3.3578947368421054e-06,
"loss": 0.1577,
"step": 6825
},
{
"epoch": 0.56,
"learning_rate": 3.331578947368421e-06,
"loss": 0.1315,
"step": 6850
},
{
"epoch": 0.57,
"learning_rate": 3.305263157894737e-06,
"loss": 0.1328,
"step": 6875
},
{
"epoch": 0.57,
"learning_rate": 3.278947368421053e-06,
"loss": 0.1445,
"step": 6900
},
{
"epoch": 0.57,
"learning_rate": 3.252631578947369e-06,
"loss": 0.1373,
"step": 6925
},
{
"epoch": 0.57,
"learning_rate": 3.2263157894736845e-06,
"loss": 0.1347,
"step": 6950
},
{
"epoch": 0.57,
"learning_rate": 3.2000000000000003e-06,
"loss": 0.1323,
"step": 6975
},
{
"epoch": 0.58,
"learning_rate": 3.173684210526316e-06,
"loss": 0.1406,
"step": 7000
},
{
"epoch": 0.58,
"learning_rate": 3.1494736842105266e-06,
"loss": 0.1225,
"step": 7025
},
{
"epoch": 0.58,
"learning_rate": 3.1231578947368424e-06,
"loss": 0.1459,
"step": 7050
},
{
"epoch": 0.58,
"learning_rate": 3.096842105263158e-06,
"loss": 0.1315,
"step": 7075
},
{
"epoch": 0.58,
"learning_rate": 3.0705263157894736e-06,
"loss": 0.1233,
"step": 7100
},
{
"epoch": 0.59,
"learning_rate": 3.0442105263157894e-06,
"loss": 0.1328,
"step": 7125
},
{
"epoch": 0.59,
"learning_rate": 3.0178947368421057e-06,
"loss": 0.1374,
"step": 7150
},
{
"epoch": 0.59,
"learning_rate": 2.9915789473684215e-06,
"loss": 0.1323,
"step": 7175
},
{
"epoch": 0.59,
"learning_rate": 2.9652631578947373e-06,
"loss": 0.1576,
"step": 7200
},
{
"epoch": 0.59,
"learning_rate": 2.9389473684210527e-06,
"loss": 0.1365,
"step": 7225
},
{
"epoch": 0.6,
"learning_rate": 2.9126315789473685e-06,
"loss": 0.1519,
"step": 7250
},
{
"epoch": 0.6,
"learning_rate": 2.8863157894736843e-06,
"loss": 0.1446,
"step": 7275
},
{
"epoch": 0.6,
"learning_rate": 2.86e-06,
"loss": 0.1529,
"step": 7300
},
{
"epoch": 0.6,
"learning_rate": 2.8336842105263164e-06,
"loss": 0.1426,
"step": 7325
},
{
"epoch": 0.6,
"learning_rate": 2.8073684210526318e-06,
"loss": 0.1352,
"step": 7350
},
{
"epoch": 0.61,
"learning_rate": 2.7810526315789476e-06,
"loss": 0.1391,
"step": 7375
},
{
"epoch": 0.61,
"learning_rate": 2.7547368421052634e-06,
"loss": 0.1369,
"step": 7400
},
{
"epoch": 0.61,
"learning_rate": 2.7284210526315792e-06,
"loss": 0.1361,
"step": 7425
},
{
"epoch": 0.61,
"learning_rate": 2.7021052631578946e-06,
"loss": 0.1319,
"step": 7450
},
{
"epoch": 0.62,
"learning_rate": 2.6757894736842105e-06,
"loss": 0.1236,
"step": 7475
},
{
"epoch": 0.62,
"learning_rate": 2.6494736842105267e-06,
"loss": 0.1349,
"step": 7500
},
{
"epoch": 0.62,
"learning_rate": 2.6231578947368425e-06,
"loss": 0.1438,
"step": 7525
},
{
"epoch": 0.62,
"learning_rate": 2.5968421052631583e-06,
"loss": 0.1203,
"step": 7550
},
{
"epoch": 0.62,
"learning_rate": 2.5705263157894737e-06,
"loss": 0.1335,
"step": 7575
},
{
"epoch": 0.63,
"learning_rate": 2.5442105263157895e-06,
"loss": 0.1609,
"step": 7600
},
{
"epoch": 0.63,
"learning_rate": 2.5178947368421054e-06,
"loss": 0.1199,
"step": 7625
},
{
"epoch": 0.63,
"learning_rate": 2.491578947368421e-06,
"loss": 0.135,
"step": 7650
},
{
"epoch": 0.63,
"learning_rate": 2.465263157894737e-06,
"loss": 0.1355,
"step": 7675
},
{
"epoch": 0.63,
"learning_rate": 2.438947368421053e-06,
"loss": 0.1429,
"step": 7700
},
{
"epoch": 0.64,
"learning_rate": 2.4126315789473686e-06,
"loss": 0.1199,
"step": 7725
},
{
"epoch": 0.64,
"learning_rate": 2.3863157894736845e-06,
"loss": 0.1185,
"step": 7750
},
{
"epoch": 0.64,
"learning_rate": 2.3600000000000003e-06,
"loss": 0.158,
"step": 7775
},
{
"epoch": 0.64,
"learning_rate": 2.333684210526316e-06,
"loss": 0.1367,
"step": 7800
},
{
"epoch": 0.64,
"learning_rate": 2.307368421052632e-06,
"loss": 0.1454,
"step": 7825
},
{
"epoch": 0.65,
"learning_rate": 2.2810526315789473e-06,
"loss": 0.1239,
"step": 7850
},
{
"epoch": 0.65,
"learning_rate": 2.2547368421052635e-06,
"loss": 0.116,
"step": 7875
},
{
"epoch": 0.65,
"learning_rate": 2.228421052631579e-06,
"loss": 0.1256,
"step": 7900
},
{
"epoch": 0.65,
"learning_rate": 2.2021052631578948e-06,
"loss": 0.1233,
"step": 7925
},
{
"epoch": 0.65,
"learning_rate": 2.175789473684211e-06,
"loss": 0.1435,
"step": 7950
},
{
"epoch": 0.66,
"learning_rate": 2.1494736842105264e-06,
"loss": 0.126,
"step": 7975
},
{
"epoch": 0.66,
"learning_rate": 2.1231578947368422e-06,
"loss": 0.1343,
"step": 8000
},
{
"epoch": 0.66,
"learning_rate": 2.098947368421053e-06,
"loss": 0.1192,
"step": 8025
},
{
"epoch": 0.66,
"learning_rate": 2.0726315789473685e-06,
"loss": 0.1582,
"step": 8050
},
{
"epoch": 0.66,
"learning_rate": 2.0463157894736843e-06,
"loss": 0.1336,
"step": 8075
},
{
"epoch": 0.67,
"learning_rate": 2.02e-06,
"loss": 0.133,
"step": 8100
},
{
"epoch": 0.67,
"learning_rate": 1.993684210526316e-06,
"loss": 0.1092,
"step": 8125
},
{
"epoch": 0.67,
"learning_rate": 1.9673684210526318e-06,
"loss": 0.1518,
"step": 8150
},
{
"epoch": 0.67,
"learning_rate": 1.9410526315789476e-06,
"loss": 0.1223,
"step": 8175
},
{
"epoch": 0.67,
"learning_rate": 1.9147368421052634e-06,
"loss": 0.138,
"step": 8200
},
{
"epoch": 0.68,
"learning_rate": 1.888421052631579e-06,
"loss": 0.1348,
"step": 8225
},
{
"epoch": 0.68,
"learning_rate": 1.8621052631578948e-06,
"loss": 0.1182,
"step": 8250
},
{
"epoch": 0.68,
"learning_rate": 1.8357894736842109e-06,
"loss": 0.1305,
"step": 8275
},
{
"epoch": 0.68,
"learning_rate": 1.8094736842105265e-06,
"loss": 0.1389,
"step": 8300
},
{
"epoch": 0.69,
"learning_rate": 1.7831578947368423e-06,
"loss": 0.1275,
"step": 8325
},
{
"epoch": 0.69,
"learning_rate": 1.756842105263158e-06,
"loss": 0.1443,
"step": 8350
},
{
"epoch": 0.69,
"learning_rate": 1.730526315789474e-06,
"loss": 0.1134,
"step": 8375
},
{
"epoch": 0.69,
"learning_rate": 1.7042105263157895e-06,
"loss": 0.1122,
"step": 8400
},
{
"epoch": 0.69,
"learning_rate": 1.6778947368421054e-06,
"loss": 0.1451,
"step": 8425
},
{
"epoch": 0.7,
"learning_rate": 1.6515789473684212e-06,
"loss": 0.1204,
"step": 8450
},
{
"epoch": 0.7,
"learning_rate": 1.625263157894737e-06,
"loss": 0.121,
"step": 8475
},
{
"epoch": 0.7,
"learning_rate": 1.5989473684210526e-06,
"loss": 0.1289,
"step": 8500
},
{
"epoch": 0.7,
"learning_rate": 1.5726315789473686e-06,
"loss": 0.1388,
"step": 8525
},
{
"epoch": 0.7,
"learning_rate": 1.5463157894736845e-06,
"loss": 0.1316,
"step": 8550
},
{
"epoch": 0.71,
"learning_rate": 1.52e-06,
"loss": 0.1396,
"step": 8575
},
{
"epoch": 0.71,
"learning_rate": 1.4936842105263159e-06,
"loss": 0.1206,
"step": 8600
},
{
"epoch": 0.71,
"learning_rate": 1.4673684210526317e-06,
"loss": 0.1362,
"step": 8625
},
{
"epoch": 0.71,
"learning_rate": 1.4410526315789475e-06,
"loss": 0.1213,
"step": 8650
},
{
"epoch": 0.71,
"learning_rate": 1.4147368421052631e-06,
"loss": 0.1398,
"step": 8675
},
{
"epoch": 0.72,
"learning_rate": 1.3884210526315792e-06,
"loss": 0.1446,
"step": 8700
},
{
"epoch": 0.72,
"learning_rate": 1.362105263157895e-06,
"loss": 0.1098,
"step": 8725
},
{
"epoch": 0.72,
"learning_rate": 1.3357894736842106e-06,
"loss": 0.1352,
"step": 8750
},
{
"epoch": 0.72,
"learning_rate": 1.3094736842105262e-06,
"loss": 0.1295,
"step": 8775
},
{
"epoch": 0.72,
"learning_rate": 1.2831578947368422e-06,
"loss": 0.1309,
"step": 8800
},
{
"epoch": 0.73,
"learning_rate": 1.256842105263158e-06,
"loss": 0.1492,
"step": 8825
},
{
"epoch": 0.73,
"learning_rate": 1.2305263157894739e-06,
"loss": 0.1489,
"step": 8850
},
{
"epoch": 0.73,
"learning_rate": 1.2042105263157895e-06,
"loss": 0.1187,
"step": 8875
},
{
"epoch": 0.73,
"learning_rate": 1.1778947368421053e-06,
"loss": 0.144,
"step": 8900
},
{
"epoch": 0.73,
"learning_rate": 1.1515789473684213e-06,
"loss": 0.1201,
"step": 8925
},
{
"epoch": 0.74,
"learning_rate": 1.125263157894737e-06,
"loss": 0.11,
"step": 8950
},
{
"epoch": 0.74,
"learning_rate": 1.0989473684210527e-06,
"loss": 0.1476,
"step": 8975
},
{
"epoch": 0.74,
"learning_rate": 1.0726315789473685e-06,
"loss": 0.1462,
"step": 9000
},
{
"epoch": 0.74,
"learning_rate": 1.048421052631579e-06,
"loss": 0.1503,
"step": 9025
},
{
"epoch": 0.74,
"learning_rate": 1.0221052631578948e-06,
"loss": 0.1338,
"step": 9050
},
{
"epoch": 0.75,
"learning_rate": 9.957894736842107e-07,
"loss": 0.1339,
"step": 9075
},
{
"epoch": 0.75,
"learning_rate": 9.694736842105265e-07,
"loss": 0.1322,
"step": 9100
},
{
"epoch": 0.75,
"learning_rate": 9.431578947368422e-07,
"loss": 0.1316,
"step": 9125
},
{
"epoch": 0.75,
"learning_rate": 9.168421052631579e-07,
"loss": 0.1234,
"step": 9150
},
{
"epoch": 0.76,
"learning_rate": 8.905263157894737e-07,
"loss": 0.1297,
"step": 9175
},
{
"epoch": 0.76,
"learning_rate": 8.642105263157896e-07,
"loss": 0.1218,
"step": 9200
},
{
"epoch": 0.76,
"learning_rate": 8.378947368421054e-07,
"loss": 0.124,
"step": 9225
},
{
"epoch": 0.76,
"learning_rate": 8.115789473684212e-07,
"loss": 0.1271,
"step": 9250
},
{
"epoch": 0.76,
"learning_rate": 7.852631578947369e-07,
"loss": 0.1196,
"step": 9275
},
{
"epoch": 0.77,
"learning_rate": 7.589473684210527e-07,
"loss": 0.1296,
"step": 9300
},
{
"epoch": 0.77,
"learning_rate": 7.326315789473684e-07,
"loss": 0.1182,
"step": 9325
},
{
"epoch": 0.77,
"learning_rate": 7.063157894736842e-07,
"loss": 0.1213,
"step": 9350
},
{
"epoch": 0.77,
"learning_rate": 6.800000000000001e-07,
"loss": 0.1302,
"step": 9375
},
{
"epoch": 0.77,
"learning_rate": 6.536842105263158e-07,
"loss": 0.133,
"step": 9400
},
{
"epoch": 0.78,
"learning_rate": 6.273684210526317e-07,
"loss": 0.1202,
"step": 9425
},
{
"epoch": 0.78,
"learning_rate": 6.010526315789474e-07,
"loss": 0.1293,
"step": 9450
},
{
"epoch": 0.78,
"learning_rate": 5.747368421052632e-07,
"loss": 0.1416,
"step": 9475
},
{
"epoch": 0.78,
"learning_rate": 5.484210526315789e-07,
"loss": 0.1398,
"step": 9500
},
{
"epoch": 0.78,
"learning_rate": 5.221052631578948e-07,
"loss": 0.1314,
"step": 9525
},
{
"epoch": 0.79,
"learning_rate": 4.957894736842106e-07,
"loss": 0.1301,
"step": 9550
},
{
"epoch": 0.79,
"learning_rate": 4.694736842105264e-07,
"loss": 0.1242,
"step": 9575
},
{
"epoch": 0.79,
"learning_rate": 4.4315789473684216e-07,
"loss": 0.1194,
"step": 9600
},
{
"epoch": 0.79,
"learning_rate": 4.168421052631579e-07,
"loss": 0.1313,
"step": 9625
},
{
"epoch": 0.79,
"learning_rate": 3.905263157894737e-07,
"loss": 0.1379,
"step": 9650
},
{
"epoch": 0.8,
"learning_rate": 3.6421052631578945e-07,
"loss": 0.1469,
"step": 9675
},
{
"epoch": 0.8,
"learning_rate": 3.378947368421053e-07,
"loss": 0.1472,
"step": 9700
},
{
"epoch": 0.8,
"learning_rate": 3.115789473684211e-07,
"loss": 0.1312,
"step": 9725
},
{
"epoch": 0.8,
"learning_rate": 2.8526315789473686e-07,
"loss": 0.1152,
"step": 9750
},
{
"epoch": 0.8,
"learning_rate": 2.589473684210526e-07,
"loss": 0.1024,
"step": 9775
},
{
"epoch": 0.81,
"learning_rate": 2.3263157894736844e-07,
"loss": 0.1194,
"step": 9800
},
{
"epoch": 0.81,
"learning_rate": 2.0631578947368423e-07,
"loss": 0.1304,
"step": 9825
},
{
"epoch": 0.81,
"learning_rate": 1.8e-07,
"loss": 0.1289,
"step": 9850
},
{
"epoch": 0.81,
"learning_rate": 1.536842105263158e-07,
"loss": 0.1305,
"step": 9875
},
{
"epoch": 0.81,
"learning_rate": 1.2736842105263158e-07,
"loss": 0.1182,
"step": 9900
},
{
"epoch": 0.82,
"learning_rate": 1.0105263157894737e-07,
"loss": 0.11,
"step": 9925
},
{
"epoch": 0.82,
"learning_rate": 7.473684210526317e-08,
"loss": 0.1196,
"step": 9950
},
{
"epoch": 0.82,
"learning_rate": 4.842105263157895e-08,
"loss": 0.1286,
"step": 9975
},
{
"epoch": 0.82,
"learning_rate": 2.2105263157894736e-08,
"loss": 0.1367,
"step": 10000
},
{
"epoch": 0.82,
"step": 10000,
"total_flos": 5.4359970325966356e+20,
"train_loss": 0.16558935852050782,
"train_runtime": 67809.1634,
"train_samples_per_second": 2.36,
"train_steps_per_second": 0.147
}
],
"logging_steps": 25,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 2000,
"total_flos": 5.4359970325966356e+20,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}