t5-base-squad-visquad-aqg / trainer_state.json
longcld's picture
loss 1.1
5034058
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.074398830875515,
"global_step": 23000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 9.982285208148806e-06,
"loss": 1.2518,
"step": 100
},
{
"epoch": 0.04,
"learning_rate": 9.964570416297609e-06,
"loss": 1.2505,
"step": 200
},
{
"epoch": 0.05,
"learning_rate": 9.946855624446414e-06,
"loss": 1.2781,
"step": 300
},
{
"epoch": 0.07,
"learning_rate": 9.929140832595217e-06,
"loss": 1.2509,
"step": 400
},
{
"epoch": 0.09,
"learning_rate": 9.911426040744022e-06,
"loss": 1.2275,
"step": 500
},
{
"epoch": 0.11,
"learning_rate": 9.893711248892827e-06,
"loss": 1.2583,
"step": 600
},
{
"epoch": 0.12,
"learning_rate": 9.87599645704163e-06,
"loss": 1.2391,
"step": 700
},
{
"epoch": 0.14,
"learning_rate": 9.858281665190435e-06,
"loss": 1.2406,
"step": 800
},
{
"epoch": 0.16,
"learning_rate": 9.840566873339238e-06,
"loss": 1.2467,
"step": 900
},
{
"epoch": 0.18,
"learning_rate": 9.822852081488043e-06,
"loss": 1.251,
"step": 1000
},
{
"epoch": 0.19,
"learning_rate": 9.805137289636848e-06,
"loss": 1.2033,
"step": 1100
},
{
"epoch": 0.21,
"learning_rate": 9.787422497785651e-06,
"loss": 1.2345,
"step": 1200
},
{
"epoch": 0.23,
"learning_rate": 9.769707705934456e-06,
"loss": 1.2463,
"step": 1300
},
{
"epoch": 0.25,
"learning_rate": 9.751992914083261e-06,
"loss": 1.237,
"step": 1400
},
{
"epoch": 0.27,
"learning_rate": 9.734278122232064e-06,
"loss": 1.2276,
"step": 1500
},
{
"epoch": 0.28,
"learning_rate": 9.71656333038087e-06,
"loss": 1.2274,
"step": 1600
},
{
"epoch": 0.3,
"learning_rate": 9.698848538529672e-06,
"loss": 1.2487,
"step": 1700
},
{
"epoch": 0.32,
"learning_rate": 9.681133746678477e-06,
"loss": 1.2168,
"step": 1800
},
{
"epoch": 0.34,
"learning_rate": 9.663418954827282e-06,
"loss": 1.2465,
"step": 1900
},
{
"epoch": 0.35,
"learning_rate": 9.645704162976086e-06,
"loss": 1.2375,
"step": 2000
},
{
"epoch": 0.37,
"learning_rate": 9.627989371124889e-06,
"loss": 1.2178,
"step": 2100
},
{
"epoch": 0.39,
"learning_rate": 9.610274579273694e-06,
"loss": 1.2438,
"step": 2200
},
{
"epoch": 0.41,
"learning_rate": 9.592559787422499e-06,
"loss": 1.2365,
"step": 2300
},
{
"epoch": 0.43,
"learning_rate": 9.574844995571303e-06,
"loss": 1.2508,
"step": 2400
},
{
"epoch": 0.44,
"learning_rate": 9.557130203720107e-06,
"loss": 1.2316,
"step": 2500
},
{
"epoch": 0.46,
"learning_rate": 9.539415411868912e-06,
"loss": 1.2448,
"step": 2600
},
{
"epoch": 0.48,
"learning_rate": 9.521700620017715e-06,
"loss": 1.2259,
"step": 2700
},
{
"epoch": 0.5,
"learning_rate": 9.50398582816652e-06,
"loss": 1.2091,
"step": 2800
},
{
"epoch": 0.51,
"learning_rate": 9.486271036315325e-06,
"loss": 1.2292,
"step": 2900
},
{
"epoch": 0.53,
"learning_rate": 9.46855624446413e-06,
"loss": 1.2013,
"step": 3000
},
{
"epoch": 0.55,
"learning_rate": 9.450841452612933e-06,
"loss": 1.2216,
"step": 3100
},
{
"epoch": 0.57,
"learning_rate": 9.433126660761736e-06,
"loss": 1.2353,
"step": 3200
},
{
"epoch": 0.58,
"learning_rate": 9.415411868910541e-06,
"loss": 1.2189,
"step": 3300
},
{
"epoch": 0.6,
"learning_rate": 9.397697077059346e-06,
"loss": 1.2103,
"step": 3400
},
{
"epoch": 0.62,
"learning_rate": 9.379982285208149e-06,
"loss": 1.2514,
"step": 3500
},
{
"epoch": 0.64,
"learning_rate": 9.362267493356954e-06,
"loss": 1.2227,
"step": 3600
},
{
"epoch": 0.66,
"learning_rate": 9.344552701505759e-06,
"loss": 1.2287,
"step": 3700
},
{
"epoch": 0.67,
"learning_rate": 9.326837909654562e-06,
"loss": 1.2374,
"step": 3800
},
{
"epoch": 0.69,
"learning_rate": 9.309123117803367e-06,
"loss": 1.2078,
"step": 3900
},
{
"epoch": 0.71,
"learning_rate": 9.29140832595217e-06,
"loss": 1.2001,
"step": 4000
},
{
"epoch": 0.73,
"learning_rate": 9.273693534100975e-06,
"loss": 1.2325,
"step": 4100
},
{
"epoch": 0.74,
"learning_rate": 9.25597874224978e-06,
"loss": 1.2311,
"step": 4200
},
{
"epoch": 0.76,
"learning_rate": 9.238263950398583e-06,
"loss": 1.2056,
"step": 4300
},
{
"epoch": 0.78,
"learning_rate": 9.220549158547388e-06,
"loss": 1.1852,
"step": 4400
},
{
"epoch": 0.8,
"learning_rate": 9.202834366696191e-06,
"loss": 1.2417,
"step": 4500
},
{
"epoch": 0.81,
"learning_rate": 9.185119574844996e-06,
"loss": 1.2329,
"step": 4600
},
{
"epoch": 0.83,
"learning_rate": 9.167404782993801e-06,
"loss": 1.2252,
"step": 4700
},
{
"epoch": 0.85,
"learning_rate": 9.149689991142604e-06,
"loss": 1.2052,
"step": 4800
},
{
"epoch": 0.87,
"learning_rate": 9.13197519929141e-06,
"loss": 1.2238,
"step": 4900
},
{
"epoch": 0.89,
"learning_rate": 9.114260407440213e-06,
"loss": 1.2166,
"step": 5000
},
{
"epoch": 0.9,
"learning_rate": 9.096545615589017e-06,
"loss": 1.2269,
"step": 5100
},
{
"epoch": 0.92,
"learning_rate": 9.078830823737822e-06,
"loss": 1.232,
"step": 5200
},
{
"epoch": 0.94,
"learning_rate": 9.061116031886627e-06,
"loss": 1.2372,
"step": 5300
},
{
"epoch": 0.96,
"learning_rate": 9.04340124003543e-06,
"loss": 1.2281,
"step": 5400
},
{
"epoch": 0.97,
"learning_rate": 9.025686448184234e-06,
"loss": 1.2031,
"step": 5500
},
{
"epoch": 0.99,
"learning_rate": 9.007971656333039e-06,
"loss": 1.1884,
"step": 5600
},
{
"epoch": 1.01,
"learning_rate": 8.990256864481844e-06,
"loss": 1.1951,
"step": 5700
},
{
"epoch": 1.03,
"learning_rate": 8.972542072630648e-06,
"loss": 1.2428,
"step": 5800
},
{
"epoch": 1.05,
"learning_rate": 8.954827280779452e-06,
"loss": 1.2197,
"step": 5900
},
{
"epoch": 1.06,
"learning_rate": 8.937112488928255e-06,
"loss": 1.1972,
"step": 6000
},
{
"epoch": 1.08,
"learning_rate": 8.91939769707706e-06,
"loss": 1.1989,
"step": 6100
},
{
"epoch": 1.1,
"learning_rate": 8.901682905225865e-06,
"loss": 1.1922,
"step": 6200
},
{
"epoch": 1.12,
"learning_rate": 8.883968113374668e-06,
"loss": 1.2149,
"step": 6300
},
{
"epoch": 1.13,
"learning_rate": 8.866253321523473e-06,
"loss": 1.1956,
"step": 6400
},
{
"epoch": 1.15,
"learning_rate": 8.848538529672278e-06,
"loss": 1.2049,
"step": 6500
},
{
"epoch": 1.17,
"learning_rate": 8.830823737821081e-06,
"loss": 1.2201,
"step": 6600
},
{
"epoch": 1.19,
"learning_rate": 8.813108945969886e-06,
"loss": 1.206,
"step": 6700
},
{
"epoch": 1.2,
"learning_rate": 8.795394154118689e-06,
"loss": 1.2079,
"step": 6800
},
{
"epoch": 1.22,
"learning_rate": 8.777679362267494e-06,
"loss": 1.2033,
"step": 6900
},
{
"epoch": 1.24,
"learning_rate": 8.759964570416299e-06,
"loss": 1.1853,
"step": 7000
},
{
"epoch": 1.26,
"learning_rate": 8.742249778565102e-06,
"loss": 1.1822,
"step": 7100
},
{
"epoch": 1.28,
"learning_rate": 8.724534986713907e-06,
"loss": 1.1834,
"step": 7200
},
{
"epoch": 1.29,
"learning_rate": 8.70682019486271e-06,
"loss": 1.2146,
"step": 7300
},
{
"epoch": 1.31,
"learning_rate": 8.689105403011515e-06,
"loss": 1.1983,
"step": 7400
},
{
"epoch": 1.33,
"learning_rate": 8.67139061116032e-06,
"loss": 1.2084,
"step": 7500
},
{
"epoch": 1.35,
"learning_rate": 8.653675819309125e-06,
"loss": 1.2055,
"step": 7600
},
{
"epoch": 1.36,
"learning_rate": 8.635961027457928e-06,
"loss": 1.2199,
"step": 7700
},
{
"epoch": 1.38,
"learning_rate": 8.618246235606731e-06,
"loss": 1.1745,
"step": 7800
},
{
"epoch": 1.4,
"learning_rate": 8.600531443755536e-06,
"loss": 1.195,
"step": 7900
},
{
"epoch": 1.42,
"learning_rate": 8.582816651904341e-06,
"loss": 1.2223,
"step": 8000
},
{
"epoch": 1.43,
"learning_rate": 8.565101860053146e-06,
"loss": 1.1919,
"step": 8100
},
{
"epoch": 1.45,
"learning_rate": 8.54738706820195e-06,
"loss": 1.2043,
"step": 8200
},
{
"epoch": 1.47,
"learning_rate": 8.529672276350753e-06,
"loss": 1.2102,
"step": 8300
},
{
"epoch": 1.49,
"learning_rate": 8.511957484499558e-06,
"loss": 1.1839,
"step": 8400
},
{
"epoch": 1.51,
"learning_rate": 8.494242692648362e-06,
"loss": 1.1889,
"step": 8500
},
{
"epoch": 1.52,
"learning_rate": 8.476527900797167e-06,
"loss": 1.1858,
"step": 8600
},
{
"epoch": 1.54,
"learning_rate": 8.45881310894597e-06,
"loss": 1.1706,
"step": 8700
},
{
"epoch": 1.56,
"learning_rate": 8.441098317094775e-06,
"loss": 1.2071,
"step": 8800
},
{
"epoch": 1.58,
"learning_rate": 8.423383525243579e-06,
"loss": 1.1671,
"step": 8900
},
{
"epoch": 1.59,
"learning_rate": 8.405668733392384e-06,
"loss": 1.1837,
"step": 9000
},
{
"epoch": 1.61,
"learning_rate": 8.387953941541187e-06,
"loss": 1.1627,
"step": 9100
},
{
"epoch": 1.63,
"learning_rate": 8.370239149689992e-06,
"loss": 1.2027,
"step": 9200
},
{
"epoch": 1.65,
"learning_rate": 8.352524357838797e-06,
"loss": 1.1831,
"step": 9300
},
{
"epoch": 1.67,
"learning_rate": 8.3348095659876e-06,
"loss": 1.1941,
"step": 9400
},
{
"epoch": 1.68,
"learning_rate": 8.317094774136405e-06,
"loss": 1.2047,
"step": 9500
},
{
"epoch": 1.7,
"learning_rate": 8.299379982285208e-06,
"loss": 1.2045,
"step": 9600
},
{
"epoch": 1.72,
"learning_rate": 8.281665190434013e-06,
"loss": 1.2022,
"step": 9700
},
{
"epoch": 1.74,
"learning_rate": 8.263950398582818e-06,
"loss": 1.1934,
"step": 9800
},
{
"epoch": 1.75,
"learning_rate": 8.246235606731621e-06,
"loss": 1.1672,
"step": 9900
},
{
"epoch": 1.77,
"learning_rate": 8.228520814880426e-06,
"loss": 1.1802,
"step": 10000
},
{
"epoch": 1.79,
"learning_rate": 8.210806023029229e-06,
"loss": 1.1933,
"step": 10100
},
{
"epoch": 1.81,
"learning_rate": 8.193091231178034e-06,
"loss": 1.205,
"step": 10200
},
{
"epoch": 1.82,
"learning_rate": 8.175376439326839e-06,
"loss": 1.1844,
"step": 10300
},
{
"epoch": 1.84,
"learning_rate": 8.157661647475644e-06,
"loss": 1.197,
"step": 10400
},
{
"epoch": 1.86,
"learning_rate": 8.139946855624447e-06,
"loss": 1.1978,
"step": 10500
},
{
"epoch": 1.88,
"learning_rate": 8.12223206377325e-06,
"loss": 1.2017,
"step": 10600
},
{
"epoch": 1.9,
"learning_rate": 8.104517271922055e-06,
"loss": 1.2067,
"step": 10700
},
{
"epoch": 1.91,
"learning_rate": 8.08680248007086e-06,
"loss": 1.2042,
"step": 10800
},
{
"epoch": 1.93,
"learning_rate": 8.069087688219665e-06,
"loss": 1.206,
"step": 10900
},
{
"epoch": 1.95,
"learning_rate": 8.051372896368468e-06,
"loss": 1.1987,
"step": 11000
},
{
"epoch": 1.97,
"learning_rate": 8.033658104517273e-06,
"loss": 1.1688,
"step": 11100
},
{
"epoch": 1.98,
"learning_rate": 8.015943312666076e-06,
"loss": 1.1723,
"step": 11200
},
{
"epoch": 2.0,
"learning_rate": 7.998228520814881e-06,
"loss": 1.1676,
"step": 11300
},
{
"epoch": 2.02,
"learning_rate": 7.980513728963686e-06,
"loss": 1.1638,
"step": 11400
},
{
"epoch": 2.04,
"learning_rate": 7.96279893711249e-06,
"loss": 1.2048,
"step": 11500
},
{
"epoch": 2.05,
"learning_rate": 7.945084145261294e-06,
"loss": 1.1705,
"step": 11600
},
{
"epoch": 2.07,
"learning_rate": 7.927369353410098e-06,
"loss": 1.1734,
"step": 11700
},
{
"epoch": 2.09,
"learning_rate": 7.909654561558902e-06,
"loss": 1.1781,
"step": 11800
},
{
"epoch": 2.11,
"learning_rate": 7.891939769707706e-06,
"loss": 1.1313,
"step": 11900
},
{
"epoch": 2.13,
"learning_rate": 7.87422497785651e-06,
"loss": 1.1673,
"step": 12000
},
{
"epoch": 2.14,
"learning_rate": 7.856510186005316e-06,
"loss": 1.1926,
"step": 12100
},
{
"epoch": 2.16,
"learning_rate": 7.838795394154119e-06,
"loss": 1.1832,
"step": 12200
},
{
"epoch": 2.18,
"learning_rate": 7.821080602302924e-06,
"loss": 1.1921,
"step": 12300
},
{
"epoch": 2.2,
"learning_rate": 7.803365810451727e-06,
"loss": 1.1869,
"step": 12400
},
{
"epoch": 2.21,
"learning_rate": 7.785651018600532e-06,
"loss": 1.1816,
"step": 12500
},
{
"epoch": 2.23,
"learning_rate": 7.767936226749337e-06,
"loss": 1.1784,
"step": 12600
},
{
"epoch": 2.25,
"learning_rate": 7.750221434898142e-06,
"loss": 1.1809,
"step": 12700
},
{
"epoch": 2.27,
"learning_rate": 7.732506643046945e-06,
"loss": 1.1713,
"step": 12800
},
{
"epoch": 2.29,
"learning_rate": 7.714791851195748e-06,
"loss": 1.178,
"step": 12900
},
{
"epoch": 2.3,
"learning_rate": 7.697077059344553e-06,
"loss": 1.1915,
"step": 13000
},
{
"epoch": 2.32,
"learning_rate": 7.679362267493358e-06,
"loss": 1.1814,
"step": 13100
},
{
"epoch": 2.34,
"learning_rate": 7.661647475642163e-06,
"loss": 1.1598,
"step": 13200
},
{
"epoch": 2.36,
"learning_rate": 7.643932683790966e-06,
"loss": 1.192,
"step": 13300
},
{
"epoch": 2.37,
"learning_rate": 7.62621789193977e-06,
"loss": 1.1932,
"step": 13400
},
{
"epoch": 2.39,
"learning_rate": 7.608503100088574e-06,
"loss": 1.1586,
"step": 13500
},
{
"epoch": 2.41,
"learning_rate": 7.590788308237379e-06,
"loss": 1.1614,
"step": 13600
},
{
"epoch": 2.43,
"learning_rate": 7.573073516386183e-06,
"loss": 1.1527,
"step": 13700
},
{
"epoch": 2.44,
"learning_rate": 7.555358724534987e-06,
"loss": 1.181,
"step": 13800
},
{
"epoch": 2.46,
"learning_rate": 7.537643932683791e-06,
"loss": 1.1306,
"step": 13900
},
{
"epoch": 2.48,
"learning_rate": 7.519929140832596e-06,
"loss": 1.1611,
"step": 14000
},
{
"epoch": 2.5,
"learning_rate": 7.5022143489814e-06,
"loss": 1.1703,
"step": 14100
},
{
"epoch": 2.52,
"learning_rate": 7.484499557130205e-06,
"loss": 1.1642,
"step": 14200
},
{
"epoch": 2.53,
"learning_rate": 7.466784765279008e-06,
"loss": 1.1285,
"step": 14300
},
{
"epoch": 2.55,
"learning_rate": 7.449069973427812e-06,
"loss": 1.1627,
"step": 14400
},
{
"epoch": 2.57,
"learning_rate": 7.431355181576617e-06,
"loss": 1.1667,
"step": 14500
},
{
"epoch": 2.59,
"learning_rate": 7.413640389725421e-06,
"loss": 1.162,
"step": 14600
},
{
"epoch": 2.6,
"learning_rate": 7.3959255978742254e-06,
"loss": 1.1835,
"step": 14700
},
{
"epoch": 2.62,
"learning_rate": 7.3782108060230295e-06,
"loss": 1.1697,
"step": 14800
},
{
"epoch": 2.64,
"learning_rate": 7.3604960141718344e-06,
"loss": 1.18,
"step": 14900
},
{
"epoch": 2.66,
"learning_rate": 7.3427812223206385e-06,
"loss": 1.1594,
"step": 15000
},
{
"epoch": 2.67,
"learning_rate": 7.325066430469443e-06,
"loss": 1.165,
"step": 15100
},
{
"epoch": 2.69,
"learning_rate": 7.307351638618247e-06,
"loss": 1.1598,
"step": 15200
},
{
"epoch": 2.71,
"learning_rate": 7.289636846767051e-06,
"loss": 1.1412,
"step": 15300
},
{
"epoch": 2.73,
"learning_rate": 7.271922054915856e-06,
"loss": 1.1745,
"step": 15400
},
{
"epoch": 2.75,
"learning_rate": 7.25420726306466e-06,
"loss": 1.1961,
"step": 15500
},
{
"epoch": 2.76,
"learning_rate": 7.2364924712134646e-06,
"loss": 1.1557,
"step": 15600
},
{
"epoch": 2.78,
"learning_rate": 7.218777679362268e-06,
"loss": 1.171,
"step": 15700
},
{
"epoch": 2.8,
"learning_rate": 7.201062887511072e-06,
"loss": 1.146,
"step": 15800
},
{
"epoch": 2.82,
"learning_rate": 7.183348095659877e-06,
"loss": 1.157,
"step": 15900
},
{
"epoch": 2.83,
"learning_rate": 7.165633303808681e-06,
"loss": 1.1622,
"step": 16000
},
{
"epoch": 2.85,
"learning_rate": 7.147918511957485e-06,
"loss": 1.1661,
"step": 16100
},
{
"epoch": 2.87,
"learning_rate": 7.130203720106289e-06,
"loss": 1.1694,
"step": 16200
},
{
"epoch": 2.89,
"learning_rate": 7.112488928255094e-06,
"loss": 1.1689,
"step": 16300
},
{
"epoch": 2.91,
"learning_rate": 7.094774136403898e-06,
"loss": 1.1394,
"step": 16400
},
{
"epoch": 2.92,
"learning_rate": 7.077059344552703e-06,
"loss": 1.137,
"step": 16500
},
{
"epoch": 2.94,
"learning_rate": 7.059344552701506e-06,
"loss": 1.1639,
"step": 16600
},
{
"epoch": 2.96,
"learning_rate": 7.04162976085031e-06,
"loss": 1.1545,
"step": 16700
},
{
"epoch": 2.98,
"learning_rate": 7.023914968999115e-06,
"loss": 1.1742,
"step": 16800
},
{
"epoch": 2.99,
"learning_rate": 7.006200177147919e-06,
"loss": 1.161,
"step": 16900
},
{
"epoch": 3.01,
"learning_rate": 6.988485385296724e-06,
"loss": 1.1506,
"step": 17000
},
{
"epoch": 3.03,
"learning_rate": 6.970770593445527e-06,
"loss": 1.154,
"step": 17100
},
{
"epoch": 3.05,
"learning_rate": 6.953055801594331e-06,
"loss": 1.1414,
"step": 17200
},
{
"epoch": 3.06,
"learning_rate": 6.935341009743136e-06,
"loss": 1.1289,
"step": 17300
},
{
"epoch": 3.08,
"learning_rate": 6.91762621789194e-06,
"loss": 1.1664,
"step": 17400
},
{
"epoch": 3.1,
"learning_rate": 6.899911426040744e-06,
"loss": 1.1751,
"step": 17500
},
{
"epoch": 3.12,
"learning_rate": 6.882196634189548e-06,
"loss": 1.1514,
"step": 17600
},
{
"epoch": 3.14,
"learning_rate": 6.864481842338353e-06,
"loss": 1.1219,
"step": 17700
},
{
"epoch": 3.15,
"learning_rate": 6.846767050487157e-06,
"loss": 1.1343,
"step": 17800
},
{
"epoch": 3.17,
"learning_rate": 6.829052258635962e-06,
"loss": 1.1209,
"step": 17900
},
{
"epoch": 3.19,
"learning_rate": 6.8113374667847655e-06,
"loss": 1.1612,
"step": 18000
},
{
"epoch": 3.21,
"learning_rate": 6.79362267493357e-06,
"loss": 1.1427,
"step": 18100
},
{
"epoch": 3.22,
"learning_rate": 6.7759078830823745e-06,
"loss": 1.1621,
"step": 18200
},
{
"epoch": 3.24,
"learning_rate": 6.7581930912311786e-06,
"loss": 1.1434,
"step": 18300
},
{
"epoch": 3.26,
"learning_rate": 6.7404782993799835e-06,
"loss": 1.1324,
"step": 18400
},
{
"epoch": 3.28,
"learning_rate": 6.722763507528787e-06,
"loss": 1.1566,
"step": 18500
},
{
"epoch": 3.29,
"learning_rate": 6.705048715677592e-06,
"loss": 1.1393,
"step": 18600
},
{
"epoch": 3.31,
"learning_rate": 6.687333923826396e-06,
"loss": 1.1642,
"step": 18700
},
{
"epoch": 3.33,
"learning_rate": 6.6696191319752006e-06,
"loss": 1.1419,
"step": 18800
},
{
"epoch": 3.35,
"learning_rate": 6.651904340124004e-06,
"loss": 1.1293,
"step": 18900
},
{
"epoch": 3.37,
"learning_rate": 6.634189548272808e-06,
"loss": 1.1467,
"step": 19000
},
{
"epoch": 3.38,
"learning_rate": 6.616474756421613e-06,
"loss": 1.1619,
"step": 19100
},
{
"epoch": 3.4,
"learning_rate": 6.598759964570417e-06,
"loss": 1.1566,
"step": 19200
},
{
"epoch": 3.42,
"learning_rate": 6.581045172719222e-06,
"loss": 1.1741,
"step": 19300
},
{
"epoch": 3.44,
"learning_rate": 6.563330380868025e-06,
"loss": 1.1763,
"step": 19400
},
{
"epoch": 3.45,
"learning_rate": 6.545615589016829e-06,
"loss": 1.1628,
"step": 19500
},
{
"epoch": 3.47,
"learning_rate": 6.527900797165634e-06,
"loss": 1.1585,
"step": 19600
},
{
"epoch": 3.49,
"learning_rate": 6.510186005314438e-06,
"loss": 1.1546,
"step": 19700
},
{
"epoch": 3.51,
"learning_rate": 6.492471213463243e-06,
"loss": 1.1684,
"step": 19800
},
{
"epoch": 3.53,
"learning_rate": 6.474756421612046e-06,
"loss": 1.1274,
"step": 19900
},
{
"epoch": 3.54,
"learning_rate": 6.457041629760851e-06,
"loss": 1.1432,
"step": 20000
},
{
"epoch": 3.56,
"learning_rate": 6.439326837909655e-06,
"loss": 1.1391,
"step": 20100
},
{
"epoch": 3.58,
"learning_rate": 6.42161204605846e-06,
"loss": 1.154,
"step": 20200
},
{
"epoch": 3.6,
"learning_rate": 6.403897254207263e-06,
"loss": 1.128,
"step": 20300
},
{
"epoch": 3.61,
"learning_rate": 6.386182462356067e-06,
"loss": 1.1623,
"step": 20400
},
{
"epoch": 3.63,
"learning_rate": 6.368467670504872e-06,
"loss": 1.1661,
"step": 20500
},
{
"epoch": 3.65,
"learning_rate": 6.350752878653676e-06,
"loss": 1.1328,
"step": 20600
},
{
"epoch": 3.67,
"learning_rate": 6.333038086802481e-06,
"loss": 1.1457,
"step": 20700
},
{
"epoch": 3.68,
"learning_rate": 6.315323294951284e-06,
"loss": 1.1347,
"step": 20800
},
{
"epoch": 3.7,
"learning_rate": 6.2976085031000885e-06,
"loss": 1.1434,
"step": 20900
},
{
"epoch": 3.72,
"learning_rate": 6.279893711248893e-06,
"loss": 1.1342,
"step": 21000
},
{
"epoch": 3.74,
"learning_rate": 6.2621789193976975e-06,
"loss": 1.1377,
"step": 21100
},
{
"epoch": 3.76,
"learning_rate": 6.244464127546502e-06,
"loss": 1.1321,
"step": 21200
},
{
"epoch": 3.77,
"learning_rate": 6.226749335695306e-06,
"loss": 1.138,
"step": 21300
},
{
"epoch": 3.79,
"learning_rate": 6.2090345438441105e-06,
"loss": 1.1458,
"step": 21400
},
{
"epoch": 3.81,
"learning_rate": 6.1913197519929146e-06,
"loss": 1.1134,
"step": 21500
},
{
"epoch": 3.83,
"learning_rate": 6.1736049601417195e-06,
"loss": 1.1179,
"step": 21600
},
{
"epoch": 3.84,
"learning_rate": 6.155890168290523e-06,
"loss": 1.1396,
"step": 21700
},
{
"epoch": 3.86,
"learning_rate": 6.138175376439327e-06,
"loss": 1.122,
"step": 21800
},
{
"epoch": 3.88,
"learning_rate": 6.120460584588132e-06,
"loss": 1.1576,
"step": 21900
},
{
"epoch": 3.9,
"learning_rate": 6.102745792736936e-06,
"loss": 1.1513,
"step": 22000
},
{
"epoch": 3.91,
"learning_rate": 6.085031000885741e-06,
"loss": 1.14,
"step": 22100
},
{
"epoch": 3.93,
"learning_rate": 6.067316209034544e-06,
"loss": 1.1657,
"step": 22200
},
{
"epoch": 3.95,
"learning_rate": 6.049601417183349e-06,
"loss": 1.1452,
"step": 22300
},
{
"epoch": 3.97,
"learning_rate": 6.031886625332153e-06,
"loss": 1.1379,
"step": 22400
},
{
"epoch": 3.99,
"learning_rate": 6.014171833480958e-06,
"loss": 1.1167,
"step": 22500
},
{
"epoch": 4.0,
"learning_rate": 5.996457041629762e-06,
"loss": 1.1221,
"step": 22600
},
{
"epoch": 4.02,
"learning_rate": 5.978742249778565e-06,
"loss": 1.1274,
"step": 22700
},
{
"epoch": 4.04,
"learning_rate": 5.96102745792737e-06,
"loss": 1.1267,
"step": 22800
},
{
"epoch": 4.06,
"learning_rate": 5.943312666076174e-06,
"loss": 1.1295,
"step": 22900
},
{
"epoch": 4.07,
"learning_rate": 5.925597874224979e-06,
"loss": 1.1073,
"step": 23000
}
],
"max_steps": 56450,
"num_train_epochs": 10,
"total_flos": 4.338858046644173e+17,
"trial_name": null,
"trial_params": null
}