Go4miii's picture
commit from root
c92e6c3
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 4087,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.999926141863722e-05,
"loss": 1.0318,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 4.999704571818908e-05,
"loss": 0.8189,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 4.999335302957357e-05,
"loss": 0.6632,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 4.9988183570978795e-05,
"loss": 0.5815,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 4.998153764784998e-05,
"loss": 0.5221,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 4.9973415652871555e-05,
"loss": 0.4635,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 4.9963818065943846e-05,
"loss": 0.4266,
"step": 70
},
{
"epoch": 0.02,
"learning_rate": 4.9952745454154736e-05,
"loss": 0.4073,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 4.9940198471746226e-05,
"loss": 0.373,
"step": 90
},
{
"epoch": 0.02,
"learning_rate": 4.9926177860075696e-05,
"loss": 0.3172,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 4.991068444757214e-05,
"loss": 0.2985,
"step": 110
},
{
"epoch": 0.03,
"learning_rate": 4.989371914968722e-05,
"loss": 0.2965,
"step": 120
},
{
"epoch": 0.03,
"learning_rate": 4.987528296884116e-05,
"loss": 0.29,
"step": 130
},
{
"epoch": 0.03,
"learning_rate": 4.985537699436353e-05,
"loss": 0.2725,
"step": 140
},
{
"epoch": 0.04,
"learning_rate": 4.983400240242887e-05,
"loss": 0.236,
"step": 150
},
{
"epoch": 0.04,
"learning_rate": 4.9811160455987196e-05,
"loss": 0.2672,
"step": 160
},
{
"epoch": 0.04,
"learning_rate": 4.978685250468939e-05,
"loss": 0.2448,
"step": 170
},
{
"epoch": 0.04,
"learning_rate": 4.9761079984807426e-05,
"loss": 0.2553,
"step": 180
},
{
"epoch": 0.05,
"learning_rate": 4.973384441914953e-05,
"loss": 0.2133,
"step": 190
},
{
"epoch": 0.05,
"learning_rate": 4.970514741697021e-05,
"loss": 0.2351,
"step": 200
},
{
"epoch": 0.05,
"learning_rate": 4.967499067387514e-05,
"loss": 0.1805,
"step": 210
},
{
"epoch": 0.05,
"learning_rate": 4.964337597172099e-05,
"loss": 0.2253,
"step": 220
},
{
"epoch": 0.06,
"learning_rate": 4.961030517851014e-05,
"loss": 0.1853,
"step": 230
},
{
"epoch": 0.06,
"learning_rate": 4.957578024828033e-05,
"loss": 0.2271,
"step": 240
},
{
"epoch": 0.06,
"learning_rate": 4.953980322098914e-05,
"loss": 0.2112,
"step": 250
},
{
"epoch": 0.06,
"learning_rate": 4.9502376222393525e-05,
"loss": 0.22,
"step": 260
},
{
"epoch": 0.07,
"learning_rate": 4.9463501463924175e-05,
"loss": 0.1825,
"step": 270
},
{
"epoch": 0.07,
"learning_rate": 4.942318124255487e-05,
"loss": 0.242,
"step": 280
},
{
"epoch": 0.07,
"learning_rate": 4.9381417940666715e-05,
"loss": 0.2179,
"step": 290
},
{
"epoch": 0.07,
"learning_rate": 4.9338214025907424e-05,
"loss": 0.183,
"step": 300
},
{
"epoch": 0.08,
"learning_rate": 4.9293572051045514e-05,
"loss": 0.1886,
"step": 310
},
{
"epoch": 0.08,
"learning_rate": 4.924749465381942e-05,
"loss": 0.2062,
"step": 320
},
{
"epoch": 0.08,
"learning_rate": 4.91999845567817e-05,
"loss": 0.1647,
"step": 330
},
{
"epoch": 0.08,
"learning_rate": 4.915104456713812e-05,
"loss": 0.1828,
"step": 340
},
{
"epoch": 0.09,
"learning_rate": 4.910067757658182e-05,
"loss": 0.1781,
"step": 350
},
{
"epoch": 0.09,
"learning_rate": 4.904888656112246e-05,
"loss": 0.2016,
"step": 360
},
{
"epoch": 0.09,
"learning_rate": 4.899567458091031e-05,
"loss": 0.1761,
"step": 370
},
{
"epoch": 0.09,
"learning_rate": 4.894104478005554e-05,
"loss": 0.1729,
"step": 380
},
{
"epoch": 0.1,
"learning_rate": 4.888500038644238e-05,
"loss": 0.1894,
"step": 390
},
{
"epoch": 0.1,
"learning_rate": 4.8827544711538386e-05,
"loss": 0.2072,
"step": 400
},
{
"epoch": 0.1,
"learning_rate": 4.876868115019881e-05,
"loss": 0.1724,
"step": 410
},
{
"epoch": 0.1,
"learning_rate": 4.8708413180466e-05,
"loss": 0.2249,
"step": 420
},
{
"epoch": 0.11,
"learning_rate": 4.8646744363363904e-05,
"loss": 0.2007,
"step": 430
},
{
"epoch": 0.11,
"learning_rate": 4.858367834268763e-05,
"loss": 0.1877,
"step": 440
},
{
"epoch": 0.11,
"learning_rate": 4.851921884478817e-05,
"loss": 0.1827,
"step": 450
},
{
"epoch": 0.11,
"learning_rate": 4.845336967835226e-05,
"loss": 0.1819,
"step": 460
},
{
"epoch": 0.11,
"learning_rate": 4.8386134734177225e-05,
"loss": 0.1856,
"step": 470
},
{
"epoch": 0.12,
"learning_rate": 4.831751798494123e-05,
"loss": 0.1685,
"step": 480
},
{
"epoch": 0.12,
"learning_rate": 4.824752348496844e-05,
"loss": 0.1679,
"step": 490
},
{
"epoch": 0.12,
"learning_rate": 4.8176155369989505e-05,
"loss": 0.1788,
"step": 500
},
{
"epoch": 0.12,
"learning_rate": 4.8103417856897206e-05,
"loss": 0.1872,
"step": 510
},
{
"epoch": 0.13,
"learning_rate": 4.8029315243497244e-05,
"loss": 0.1697,
"step": 520
},
{
"epoch": 0.13,
"learning_rate": 4.7953851908254376e-05,
"loss": 0.162,
"step": 530
},
{
"epoch": 0.13,
"learning_rate": 4.787703231003363e-05,
"loss": 0.1718,
"step": 540
},
{
"epoch": 0.13,
"learning_rate": 4.77988609878369e-05,
"loss": 0.1866,
"step": 550
},
{
"epoch": 0.14,
"learning_rate": 4.771934256053471e-05,
"loss": 0.1609,
"step": 560
},
{
"epoch": 0.14,
"learning_rate": 4.763848172659333e-05,
"loss": 0.1629,
"step": 570
},
{
"epoch": 0.14,
"learning_rate": 4.755628326379716e-05,
"loss": 0.1947,
"step": 580
},
{
"epoch": 0.14,
"learning_rate": 4.747275202896642e-05,
"loss": 0.1813,
"step": 590
},
{
"epoch": 0.15,
"learning_rate": 4.738789295767016e-05,
"loss": 0.1672,
"step": 600
},
{
"epoch": 0.15,
"learning_rate": 4.730171106393466e-05,
"loss": 0.1597,
"step": 610
},
{
"epoch": 0.15,
"learning_rate": 4.7214211439947174e-05,
"loss": 0.1855,
"step": 620
},
{
"epoch": 0.15,
"learning_rate": 4.712539925575502e-05,
"loss": 0.1729,
"step": 630
},
{
"epoch": 0.16,
"learning_rate": 4.7035279758960104e-05,
"loss": 0.1771,
"step": 640
},
{
"epoch": 0.16,
"learning_rate": 4.6943858274408915e-05,
"loss": 0.1891,
"step": 650
},
{
"epoch": 0.16,
"learning_rate": 4.68511402038778e-05,
"loss": 0.1442,
"step": 660
},
{
"epoch": 0.16,
"learning_rate": 4.6757131025753886e-05,
"loss": 0.1666,
"step": 670
},
{
"epoch": 0.17,
"learning_rate": 4.666183629471132e-05,
"loss": 0.1672,
"step": 680
},
{
"epoch": 0.17,
"learning_rate": 4.656526164138307e-05,
"loss": 0.1525,
"step": 690
},
{
"epoch": 0.17,
"learning_rate": 4.646741277202829e-05,
"loss": 0.1758,
"step": 700
},
{
"epoch": 0.17,
"learning_rate": 4.6368295468195064e-05,
"loss": 0.1708,
"step": 710
},
{
"epoch": 0.18,
"learning_rate": 4.626791558637887e-05,
"loss": 0.1682,
"step": 720
},
{
"epoch": 0.18,
"learning_rate": 4.6166279057676494e-05,
"loss": 0.1515,
"step": 730
},
{
"epoch": 0.18,
"learning_rate": 4.6063391887435605e-05,
"loss": 0.1687,
"step": 740
},
{
"epoch": 0.18,
"learning_rate": 4.595926015489991e-05,
"loss": 0.1641,
"step": 750
},
{
"epoch": 0.19,
"learning_rate": 4.585389001284999e-05,
"loss": 0.1536,
"step": 760
},
{
"epoch": 0.19,
"learning_rate": 4.574728768723965e-05,
"loss": 0.1683,
"step": 770
},
{
"epoch": 0.19,
"learning_rate": 4.563945947682821e-05,
"loss": 0.1464,
"step": 780
},
{
"epoch": 0.19,
"learning_rate": 4.5530411752808166e-05,
"loss": 0.1764,
"step": 790
},
{
"epoch": 0.2,
"learning_rate": 4.542015095842885e-05,
"loss": 0.19,
"step": 800
},
{
"epoch": 0.2,
"learning_rate": 4.5308683608615706e-05,
"loss": 0.1819,
"step": 810
},
{
"epoch": 0.2,
"learning_rate": 4.519601628958529e-05,
"loss": 0.1512,
"step": 820
},
{
"epoch": 0.2,
"learning_rate": 4.508215565845615e-05,
"loss": 0.1689,
"step": 830
},
{
"epoch": 0.21,
"learning_rate": 4.496710844285551e-05,
"loss": 0.1739,
"step": 840
},
{
"epoch": 0.21,
"learning_rate": 4.485088144052172e-05,
"loss": 0.1535,
"step": 850
},
{
"epoch": 0.21,
"learning_rate": 4.4733481518902584e-05,
"loss": 0.142,
"step": 860
},
{
"epoch": 0.21,
"learning_rate": 4.4614915614749645e-05,
"loss": 0.1584,
"step": 870
},
{
"epoch": 0.22,
"learning_rate": 4.449519073370827e-05,
"loss": 0.1483,
"step": 880
},
{
"epoch": 0.22,
"learning_rate": 4.43743139499037e-05,
"loss": 0.1587,
"step": 890
},
{
"epoch": 0.22,
"learning_rate": 4.425229240552313e-05,
"loss": 0.1497,
"step": 900
},
{
"epoch": 0.22,
"learning_rate": 4.4129133310393646e-05,
"loss": 0.1368,
"step": 910
},
{
"epoch": 0.23,
"learning_rate": 4.400484394155622e-05,
"loss": 0.1565,
"step": 920
},
{
"epoch": 0.23,
"learning_rate": 4.3879431642835784e-05,
"loss": 0.1377,
"step": 930
},
{
"epoch": 0.23,
"learning_rate": 4.3752903824407234e-05,
"loss": 0.1755,
"step": 940
},
{
"epoch": 0.23,
"learning_rate": 4.3625267962357676e-05,
"loss": 0.165,
"step": 950
},
{
"epoch": 0.23,
"learning_rate": 4.3496531598244616e-05,
"loss": 0.1397,
"step": 960
},
{
"epoch": 0.24,
"learning_rate": 4.33667023386504e-05,
"loss": 0.1518,
"step": 970
},
{
"epoch": 0.24,
"learning_rate": 4.323578785473272e-05,
"loss": 0.1332,
"step": 980
},
{
"epoch": 0.24,
"learning_rate": 4.3103795881771454e-05,
"loss": 0.1541,
"step": 990
},
{
"epoch": 0.24,
"learning_rate": 4.2970734218711474e-05,
"loss": 0.1508,
"step": 1000
},
{
"epoch": 0.25,
"learning_rate": 4.283661072770193e-05,
"loss": 0.1465,
"step": 1010
},
{
"epoch": 0.25,
"learning_rate": 4.270143333363171e-05,
"loss": 0.1505,
"step": 1020
},
{
"epoch": 0.25,
"learning_rate": 4.2565210023661104e-05,
"loss": 0.1674,
"step": 1030
},
{
"epoch": 0.25,
"learning_rate": 4.242794884674996e-05,
"loss": 0.1352,
"step": 1040
},
{
"epoch": 0.26,
"learning_rate": 4.2289657913182045e-05,
"loss": 0.1667,
"step": 1050
},
{
"epoch": 0.26,
"learning_rate": 4.2150345394085845e-05,
"loss": 0.1495,
"step": 1060
},
{
"epoch": 0.26,
"learning_rate": 4.201001952095178e-05,
"loss": 0.1538,
"step": 1070
},
{
"epoch": 0.26,
"learning_rate": 4.186868858514582e-05,
"loss": 0.1425,
"step": 1080
},
{
"epoch": 0.27,
"learning_rate": 4.172636093741959e-05,
"loss": 0.1357,
"step": 1090
},
{
"epoch": 0.27,
"learning_rate": 4.158304498741691e-05,
"loss": 0.1493,
"step": 1100
},
{
"epoch": 0.27,
"learning_rate": 4.143874920317696e-05,
"loss": 0.1439,
"step": 1110
},
{
"epoch": 0.27,
"learning_rate": 4.1293482110633905e-05,
"loss": 0.1449,
"step": 1120
},
{
"epoch": 0.28,
"learning_rate": 4.114725229311311e-05,
"loss": 0.1451,
"step": 1130
},
{
"epoch": 0.28,
"learning_rate": 4.100006839082402e-05,
"loss": 0.1559,
"step": 1140
},
{
"epoch": 0.28,
"learning_rate": 4.085193910034959e-05,
"loss": 0.1452,
"step": 1150
},
{
"epoch": 0.28,
"learning_rate": 4.0702873174132483e-05,
"loss": 0.1584,
"step": 1160
},
{
"epoch": 0.29,
"learning_rate": 4.0552879419957904e-05,
"loss": 0.1341,
"step": 1170
},
{
"epoch": 0.29,
"learning_rate": 4.0401966700433144e-05,
"loss": 0.1433,
"step": 1180
},
{
"epoch": 0.29,
"learning_rate": 4.025014393246399e-05,
"loss": 0.1583,
"step": 1190
},
{
"epoch": 0.29,
"learning_rate": 4.0097420086727766e-05,
"loss": 0.1508,
"step": 1200
},
{
"epoch": 0.3,
"learning_rate": 3.9943804187143384e-05,
"loss": 0.1577,
"step": 1210
},
{
"epoch": 0.3,
"learning_rate": 3.978930531033807e-05,
"loss": 0.1431,
"step": 1220
},
{
"epoch": 0.3,
"learning_rate": 3.963393258511109e-05,
"loss": 0.161,
"step": 1230
},
{
"epoch": 0.3,
"learning_rate": 3.9477695191894395e-05,
"loss": 0.1396,
"step": 1240
},
{
"epoch": 0.31,
"learning_rate": 3.9320602362210116e-05,
"loss": 0.1477,
"step": 1250
},
{
"epoch": 0.31,
"learning_rate": 3.9162663378125166e-05,
"loss": 0.1461,
"step": 1260
},
{
"epoch": 0.31,
"learning_rate": 3.900388757170274e-05,
"loss": 0.152,
"step": 1270
},
{
"epoch": 0.31,
"learning_rate": 3.8844284324450956e-05,
"loss": 0.1563,
"step": 1280
},
{
"epoch": 0.32,
"learning_rate": 3.8683863066768534e-05,
"loss": 0.1441,
"step": 1290
},
{
"epoch": 0.32,
"learning_rate": 3.8522633277387555e-05,
"loss": 0.1448,
"step": 1300
},
{
"epoch": 0.32,
"learning_rate": 3.836060448281342e-05,
"loss": 0.1659,
"step": 1310
},
{
"epoch": 0.32,
"learning_rate": 3.819778625676197e-05,
"loss": 0.1464,
"step": 1320
},
{
"epoch": 0.33,
"learning_rate": 3.803418821959379e-05,
"loss": 0.147,
"step": 1330
},
{
"epoch": 0.33,
"learning_rate": 3.7869820037745776e-05,
"loss": 0.1479,
"step": 1340
},
{
"epoch": 0.33,
"learning_rate": 3.770469142315997e-05,
"loss": 0.1352,
"step": 1350
},
{
"epoch": 0.33,
"learning_rate": 3.7538812132709775e-05,
"loss": 0.1638,
"step": 1360
},
{
"epoch": 0.34,
"learning_rate": 3.737219196762337e-05,
"loss": 0.149,
"step": 1370
},
{
"epoch": 0.34,
"learning_rate": 3.7204840772904645e-05,
"loss": 0.155,
"step": 1380
},
{
"epoch": 0.34,
"learning_rate": 3.703676843675147e-05,
"loss": 0.159,
"step": 1390
},
{
"epoch": 0.34,
"learning_rate": 3.686798488997146e-05,
"loss": 0.1455,
"step": 1400
},
{
"epoch": 0.34,
"learning_rate": 3.669850010539518e-05,
"loss": 0.1439,
"step": 1410
},
{
"epoch": 0.35,
"learning_rate": 3.652832409728686e-05,
"loss": 0.1344,
"step": 1420
},
{
"epoch": 0.35,
"learning_rate": 3.635746692075276e-05,
"loss": 0.1519,
"step": 1430
},
{
"epoch": 0.35,
"learning_rate": 3.6185938671146975e-05,
"loss": 0.1469,
"step": 1440
},
{
"epoch": 0.35,
"learning_rate": 3.601374948347498e-05,
"loss": 0.1343,
"step": 1450
},
{
"epoch": 0.36,
"learning_rate": 3.584090953179475e-05,
"loss": 0.143,
"step": 1460
},
{
"epoch": 0.36,
"learning_rate": 3.566742902861566e-05,
"loss": 0.137,
"step": 1470
},
{
"epoch": 0.36,
"learning_rate": 3.549331822429503e-05,
"loss": 0.1485,
"step": 1480
},
{
"epoch": 0.36,
"learning_rate": 3.531858740643247e-05,
"loss": 0.1182,
"step": 1490
},
{
"epoch": 0.37,
"learning_rate": 3.5143246899262014e-05,
"loss": 0.1528,
"step": 1500
},
{
"epoch": 0.37,
"learning_rate": 3.4967307063042134e-05,
"loss": 0.1648,
"step": 1510
},
{
"epoch": 0.37,
"learning_rate": 3.479077829344354e-05,
"loss": 0.1213,
"step": 1520
},
{
"epoch": 0.37,
"learning_rate": 3.461367102093498e-05,
"loss": 0.1412,
"step": 1530
},
{
"epoch": 0.38,
"learning_rate": 3.443599571016689e-05,
"loss": 0.1397,
"step": 1540
},
{
"epoch": 0.38,
"learning_rate": 3.425776285935315e-05,
"loss": 0.1527,
"step": 1550
},
{
"epoch": 0.38,
"learning_rate": 3.407898299965069e-05,
"loss": 0.137,
"step": 1560
},
{
"epoch": 0.38,
"learning_rate": 3.3899666694537307e-05,
"loss": 0.121,
"step": 1570
},
{
"epoch": 0.39,
"learning_rate": 3.3719824539187484e-05,
"loss": 0.1401,
"step": 1580
},
{
"epoch": 0.39,
"learning_rate": 3.353946715984635e-05,
"loss": 0.1163,
"step": 1590
},
{
"epoch": 0.39,
"learning_rate": 3.3358605213201835e-05,
"loss": 0.1562,
"step": 1600
},
{
"epoch": 0.39,
"learning_rate": 3.3177249385754964e-05,
"loss": 0.1324,
"step": 1610
},
{
"epoch": 0.4,
"learning_rate": 3.299541039318849e-05,
"loss": 0.1586,
"step": 1620
},
{
"epoch": 0.4,
"learning_rate": 3.281309897973368e-05,
"loss": 0.1626,
"step": 1630
},
{
"epoch": 0.4,
"learning_rate": 3.263032591753552e-05,
"loss": 0.1366,
"step": 1640
},
{
"epoch": 0.4,
"learning_rate": 3.2447102006016186e-05,
"loss": 0.1336,
"step": 1650
},
{
"epoch": 0.41,
"learning_rate": 3.226343807123697e-05,
"loss": 0.1376,
"step": 1660
},
{
"epoch": 0.41,
"learning_rate": 3.2079344965258645e-05,
"loss": 0.132,
"step": 1670
},
{
"epoch": 0.41,
"learning_rate": 3.189483356550015e-05,
"loss": 0.1316,
"step": 1680
},
{
"epoch": 0.41,
"learning_rate": 3.1709914774095975e-05,
"loss": 0.1448,
"step": 1690
},
{
"epoch": 0.42,
"learning_rate": 3.152459951725196e-05,
"loss": 0.132,
"step": 1700
},
{
"epoch": 0.42,
"learning_rate": 3.133889874459971e-05,
"loss": 0.133,
"step": 1710
},
{
"epoch": 0.42,
"learning_rate": 3.115282342854959e-05,
"loss": 0.1558,
"step": 1720
},
{
"epoch": 0.42,
"learning_rate": 3.096638456364245e-05,
"loss": 0.152,
"step": 1730
},
{
"epoch": 0.43,
"learning_rate": 3.0779593165899954e-05,
"loss": 0.1293,
"step": 1740
},
{
"epoch": 0.43,
"learning_rate": 3.059246027217372e-05,
"loss": 0.1361,
"step": 1750
},
{
"epoch": 0.43,
"learning_rate": 3.0404996939493156e-05,
"loss": 0.1462,
"step": 1760
},
{
"epoch": 0.43,
"learning_rate": 3.021721424441216e-05,
"loss": 0.1671,
"step": 1770
},
{
"epoch": 0.44,
"learning_rate": 3.0029123282354638e-05,
"loss": 0.1269,
"step": 1780
},
{
"epoch": 0.44,
"learning_rate": 2.984073516695892e-05,
"loss": 0.1571,
"step": 1790
},
{
"epoch": 0.44,
"learning_rate": 2.9652061029421087e-05,
"loss": 0.139,
"step": 1800
},
{
"epoch": 0.44,
"learning_rate": 2.9463112017837262e-05,
"loss": 0.1234,
"step": 1810
},
{
"epoch": 0.45,
"learning_rate": 2.927389929654494e-05,
"loss": 0.1511,
"step": 1820
},
{
"epoch": 0.45,
"learning_rate": 2.9084434045463255e-05,
"loss": 0.1347,
"step": 1830
},
{
"epoch": 0.45,
"learning_rate": 2.8894727459432504e-05,
"loss": 0.1464,
"step": 1840
},
{
"epoch": 0.45,
"learning_rate": 2.870479074755257e-05,
"loss": 0.1446,
"step": 1850
},
{
"epoch": 0.46,
"learning_rate": 2.8514635132520716e-05,
"loss": 0.1371,
"step": 1860
},
{
"epoch": 0.46,
"learning_rate": 2.8324271849968394e-05,
"loss": 0.1345,
"step": 1870
},
{
"epoch": 0.46,
"learning_rate": 2.8133712147797404e-05,
"loss": 0.1396,
"step": 1880
},
{
"epoch": 0.46,
"learning_rate": 2.794296728551533e-05,
"loss": 0.1225,
"step": 1890
},
{
"epoch": 0.46,
"learning_rate": 2.7752048533570186e-05,
"loss": 0.1352,
"step": 1900
},
{
"epoch": 0.47,
"learning_rate": 2.756096717268453e-05,
"loss": 0.146,
"step": 1910
},
{
"epoch": 0.47,
"learning_rate": 2.7369734493188924e-05,
"loss": 0.1426,
"step": 1920
},
{
"epoch": 0.47,
"learning_rate": 2.71783617943548e-05,
"loss": 0.1494,
"step": 1930
},
{
"epoch": 0.47,
"learning_rate": 2.6986860383726865e-05,
"loss": 0.1098,
"step": 1940
},
{
"epoch": 0.48,
"learning_rate": 2.679524157645494e-05,
"loss": 0.1436,
"step": 1950
},
{
"epoch": 0.48,
"learning_rate": 2.660351669462541e-05,
"loss": 0.1269,
"step": 1960
},
{
"epoch": 0.48,
"learning_rate": 2.6411697066592228e-05,
"loss": 0.1283,
"step": 1970
},
{
"epoch": 0.48,
"learning_rate": 2.62197940263076e-05,
"loss": 0.1379,
"step": 1980
},
{
"epoch": 0.49,
"learning_rate": 2.6027818912652218e-05,
"loss": 0.135,
"step": 1990
},
{
"epoch": 0.49,
"learning_rate": 2.5835783068765378e-05,
"loss": 0.1365,
"step": 2000
},
{
"epoch": 0.49,
"learning_rate": 2.564369784137472e-05,
"loss": 0.1235,
"step": 2010
},
{
"epoch": 0.49,
"learning_rate": 2.5451574580125738e-05,
"loss": 0.1514,
"step": 2020
},
{
"epoch": 0.5,
"learning_rate": 2.5259424636911256e-05,
"loss": 0.1358,
"step": 2030
},
{
"epoch": 0.5,
"learning_rate": 2.5067259365200624e-05,
"loss": 0.1444,
"step": 2040
},
{
"epoch": 0.5,
"learning_rate": 2.4875090119368905e-05,
"loss": 0.1254,
"step": 2050
},
{
"epoch": 0.5,
"learning_rate": 2.468292825402597e-05,
"loss": 0.1247,
"step": 2060
},
{
"epoch": 0.51,
"learning_rate": 2.449078512334562e-05,
"loss": 0.1293,
"step": 2070
},
{
"epoch": 0.51,
"learning_rate": 2.429867208039467e-05,
"loss": 0.126,
"step": 2080
},
{
"epoch": 0.51,
"learning_rate": 2.4106600476462173e-05,
"loss": 0.1351,
"step": 2090
},
{
"epoch": 0.51,
"learning_rate": 2.3914581660388683e-05,
"loss": 0.1386,
"step": 2100
},
{
"epoch": 0.52,
"learning_rate": 2.3722626977895706e-05,
"loss": 0.1294,
"step": 2110
},
{
"epoch": 0.52,
"learning_rate": 2.3530747770915328e-05,
"loss": 0.1188,
"step": 2120
},
{
"epoch": 0.52,
"learning_rate": 2.3338955376920034e-05,
"loss": 0.1283,
"step": 2130
},
{
"epoch": 0.52,
"learning_rate": 2.3147261128252858e-05,
"loss": 0.1239,
"step": 2140
},
{
"epoch": 0.53,
"learning_rate": 2.295567635145774e-05,
"loss": 0.1258,
"step": 2150
},
{
"epoch": 0.53,
"learning_rate": 2.276421236661032e-05,
"loss": 0.145,
"step": 2160
},
{
"epoch": 0.53,
"learning_rate": 2.2572880486649086e-05,
"loss": 0.1232,
"step": 2170
},
{
"epoch": 0.53,
"learning_rate": 2.2381692016706862e-05,
"loss": 0.14,
"step": 2180
},
{
"epoch": 0.54,
"learning_rate": 2.219065825344292e-05,
"loss": 0.1594,
"step": 2190
},
{
"epoch": 0.54,
"learning_rate": 2.1999790484375433e-05,
"loss": 0.1475,
"step": 2200
},
{
"epoch": 0.54,
"learning_rate": 2.1809099987214567e-05,
"loss": 0.1176,
"step": 2210
},
{
"epoch": 0.54,
"learning_rate": 2.1618598029196082e-05,
"loss": 0.1317,
"step": 2220
},
{
"epoch": 0.55,
"learning_rate": 2.142829586641566e-05,
"loss": 0.1318,
"step": 2230
},
{
"epoch": 0.55,
"learning_rate": 2.1238204743163764e-05,
"loss": 0.1453,
"step": 2240
},
{
"epoch": 0.55,
"learning_rate": 2.104833589126124e-05,
"loss": 0.1297,
"step": 2250
},
{
"epoch": 0.55,
"learning_rate": 2.085870052939572e-05,
"loss": 0.1393,
"step": 2260
},
{
"epoch": 0.56,
"learning_rate": 2.0669309862458756e-05,
"loss": 0.1364,
"step": 2270
},
{
"epoch": 0.56,
"learning_rate": 2.0480175080883663e-05,
"loss": 0.1246,
"step": 2280
},
{
"epoch": 0.56,
"learning_rate": 2.0291307359984435e-05,
"loss": 0.1437,
"step": 2290
},
{
"epoch": 0.56,
"learning_rate": 2.0102717859295365e-05,
"loss": 0.1268,
"step": 2300
},
{
"epoch": 0.57,
"learning_rate": 1.9914417721911687e-05,
"loss": 0.1299,
"step": 2310
},
{
"epoch": 0.57,
"learning_rate": 1.9726418073831167e-05,
"loss": 0.1334,
"step": 2320
},
{
"epoch": 0.57,
"learning_rate": 1.9538730023296708e-05,
"loss": 0.138,
"step": 2330
},
{
"epoch": 0.57,
"learning_rate": 1.9351364660140004e-05,
"loss": 0.1247,
"step": 2340
},
{
"epoch": 0.57,
"learning_rate": 1.9164333055126265e-05,
"loss": 0.1218,
"step": 2350
},
{
"epoch": 0.58,
"learning_rate": 1.897764625930012e-05,
"loss": 0.1391,
"step": 2360
},
{
"epoch": 0.58,
"learning_rate": 1.879131530333261e-05,
"loss": 0.1405,
"step": 2370
},
{
"epoch": 0.58,
"learning_rate": 1.8605351196869453e-05,
"loss": 0.1454,
"step": 2380
},
{
"epoch": 0.58,
"learning_rate": 1.8419764927880488e-05,
"loss": 0.1404,
"step": 2390
},
{
"epoch": 0.59,
"learning_rate": 1.8234567462010482e-05,
"loss": 0.1277,
"step": 2400
},
{
"epoch": 0.59,
"learning_rate": 1.8049769741931178e-05,
"loss": 0.1298,
"step": 2410
},
{
"epoch": 0.59,
"learning_rate": 1.786538268669472e-05,
"loss": 0.1378,
"step": 2420
},
{
"epoch": 0.59,
"learning_rate": 1.768141719108852e-05,
"loss": 0.1336,
"step": 2430
},
{
"epoch": 0.6,
"learning_rate": 1.749788412499149e-05,
"loss": 0.131,
"step": 2440
},
{
"epoch": 0.6,
"learning_rate": 1.7314794332731786e-05,
"loss": 0.1187,
"step": 2450
},
{
"epoch": 0.6,
"learning_rate": 1.7132158632446085e-05,
"loss": 0.1291,
"step": 2460
},
{
"epoch": 0.6,
"learning_rate": 1.6949987815440332e-05,
"loss": 0.1277,
"step": 2470
},
{
"epoch": 0.61,
"learning_rate": 1.676829264555216e-05,
"loss": 0.1419,
"step": 2480
},
{
"epoch": 0.61,
"learning_rate": 1.658708385851485e-05,
"loss": 0.128,
"step": 2490
},
{
"epoch": 0.61,
"learning_rate": 1.6406372161323035e-05,
"loss": 0.1213,
"step": 2500
},
{
"epoch": 0.61,
"learning_rate": 1.6226168231600058e-05,
"loss": 0.1436,
"step": 2510
},
{
"epoch": 0.62,
"learning_rate": 1.604648271696701e-05,
"loss": 0.1297,
"step": 2520
},
{
"epoch": 0.62,
"learning_rate": 1.5867326234413692e-05,
"loss": 0.1481,
"step": 2530
},
{
"epoch": 0.62,
"learning_rate": 1.5688709369671222e-05,
"loss": 0.1237,
"step": 2540
},
{
"epoch": 0.62,
"learning_rate": 1.5510642676586606e-05,
"loss": 0.1342,
"step": 2550
},
{
"epoch": 0.63,
"learning_rate": 1.5333136676499078e-05,
"loss": 0.1265,
"step": 2560
},
{
"epoch": 0.63,
"learning_rate": 1.5156201857618532e-05,
"loss": 0.1294,
"step": 2570
},
{
"epoch": 0.63,
"learning_rate": 1.4979848674405755e-05,
"loss": 0.1218,
"step": 2580
},
{
"epoch": 0.63,
"learning_rate": 1.480408754695467e-05,
"loss": 0.1377,
"step": 2590
},
{
"epoch": 0.64,
"learning_rate": 1.4628928860376733e-05,
"loss": 0.1367,
"step": 2600
},
{
"epoch": 0.64,
"learning_rate": 1.4454382964187263e-05,
"loss": 0.1338,
"step": 2610
},
{
"epoch": 0.64,
"learning_rate": 1.4280460171693924e-05,
"loss": 0.1357,
"step": 2620
},
{
"epoch": 0.64,
"learning_rate": 1.4107170759387364e-05,
"loss": 0.1308,
"step": 2630
},
{
"epoch": 0.65,
"learning_rate": 1.3934524966334012e-05,
"loss": 0.1466,
"step": 2640
},
{
"epoch": 0.65,
"learning_rate": 1.3762532993571079e-05,
"loss": 0.1371,
"step": 2650
},
{
"epoch": 0.65,
"learning_rate": 1.35912050035038e-05,
"loss": 0.1357,
"step": 2660
},
{
"epoch": 0.65,
"learning_rate": 1.342055111930502e-05,
"loss": 0.1208,
"step": 2670
},
{
"epoch": 0.66,
"learning_rate": 1.325058142431701e-05,
"loss": 0.1168,
"step": 2680
},
{
"epoch": 0.66,
"learning_rate": 1.3081305961455658e-05,
"loss": 0.121,
"step": 2690
},
{
"epoch": 0.66,
"learning_rate": 1.291273473261716e-05,
"loss": 0.1215,
"step": 2700
},
{
"epoch": 0.66,
"learning_rate": 1.2744877698086943e-05,
"loss": 0.1302,
"step": 2710
},
{
"epoch": 0.67,
"learning_rate": 1.2577744775951194e-05,
"loss": 0.1381,
"step": 2720
},
{
"epoch": 0.67,
"learning_rate": 1.2411345841510808e-05,
"loss": 0.1398,
"step": 2730
},
{
"epoch": 0.67,
"learning_rate": 1.2245690726697939e-05,
"loss": 0.1279,
"step": 2740
},
{
"epoch": 0.67,
"learning_rate": 1.2080789219495028e-05,
"loss": 0.1419,
"step": 2750
},
{
"epoch": 0.68,
"learning_rate": 1.1916651063356455e-05,
"loss": 0.1466,
"step": 2760
},
{
"epoch": 0.68,
"learning_rate": 1.1753285956632867e-05,
"loss": 0.1237,
"step": 2770
},
{
"epoch": 0.68,
"learning_rate": 1.1590703551998111e-05,
"loss": 0.1248,
"step": 2780
},
{
"epoch": 0.68,
"learning_rate": 1.1428913455878932e-05,
"loss": 0.1256,
"step": 2790
},
{
"epoch": 0.69,
"learning_rate": 1.1267925227887275e-05,
"loss": 0.1272,
"step": 2800
},
{
"epoch": 0.69,
"learning_rate": 1.1107748380255536e-05,
"loss": 0.1325,
"step": 2810
},
{
"epoch": 0.69,
"learning_rate": 1.0948392377274477e-05,
"loss": 0.1099,
"step": 2820
},
{
"epoch": 0.69,
"learning_rate": 1.0789866634733988e-05,
"loss": 0.1418,
"step": 2830
},
{
"epoch": 0.69,
"learning_rate": 1.0632180519366796e-05,
"loss": 0.1202,
"step": 2840
},
{
"epoch": 0.7,
"learning_rate": 1.0475343348294978e-05,
"loss": 0.1304,
"step": 2850
},
{
"epoch": 0.7,
"learning_rate": 1.0319364388479455e-05,
"loss": 0.15,
"step": 2860
},
{
"epoch": 0.7,
"learning_rate": 1.0164252856172445e-05,
"loss": 0.1306,
"step": 2870
},
{
"epoch": 0.7,
"learning_rate": 1.0010017916372908e-05,
"loss": 0.1394,
"step": 2880
},
{
"epoch": 0.71,
"learning_rate": 9.856668682285003e-06,
"loss": 0.1545,
"step": 2890
},
{
"epoch": 0.71,
"learning_rate": 9.704214214779625e-06,
"loss": 0.1261,
"step": 2900
},
{
"epoch": 0.71,
"learning_rate": 9.552663521859048e-06,
"loss": 0.1222,
"step": 2910
},
{
"epoch": 0.71,
"learning_rate": 9.402025558124658e-06,
"loss": 0.1253,
"step": 2920
},
{
"epoch": 0.72,
"learning_rate": 9.25230922424786e-06,
"loss": 0.1418,
"step": 2930
},
{
"epoch": 0.72,
"learning_rate": 9.103523366444164e-06,
"loss": 0.1211,
"step": 2940
},
{
"epoch": 0.72,
"learning_rate": 8.9556767759505e-06,
"loss": 0.1251,
"step": 2950
},
{
"epoch": 0.72,
"learning_rate": 8.808778188505779e-06,
"loss": 0.1331,
"step": 2960
},
{
"epoch": 0.73,
"learning_rate": 8.6628362838347e-06,
"loss": 0.15,
"step": 2970
},
{
"epoch": 0.73,
"learning_rate": 8.517859685134932e-06,
"loss": 0.1152,
"step": 2980
},
{
"epoch": 0.73,
"learning_rate": 8.373856958567589e-06,
"loss": 0.1244,
"step": 2990
},
{
"epoch": 0.73,
"learning_rate": 8.230836612751069e-06,
"loss": 0.1491,
"step": 3000
},
{
"epoch": 0.74,
"learning_rate": 8.088807098258328e-06,
"loss": 0.1387,
"step": 3010
},
{
"epoch": 0.74,
"learning_rate": 7.94777680711756e-06,
"loss": 0.1497,
"step": 3020
},
{
"epoch": 0.74,
"learning_rate": 7.807754072316323e-06,
"loss": 0.1296,
"step": 3030
},
{
"epoch": 0.74,
"learning_rate": 7.668747167309209e-06,
"loss": 0.1278,
"step": 3040
},
{
"epoch": 0.75,
"learning_rate": 7.530764305528959e-06,
"loss": 0.1334,
"step": 3050
},
{
"epoch": 0.75,
"learning_rate": 7.393813639901195e-06,
"loss": 0.1314,
"step": 3060
},
{
"epoch": 0.75,
"learning_rate": 7.257903262362637e-06,
"loss": 0.1251,
"step": 3070
},
{
"epoch": 0.75,
"learning_rate": 7.123041203383032e-06,
"loss": 0.1127,
"step": 3080
},
{
"epoch": 0.76,
"learning_rate": 6.989235431490676e-06,
"loss": 0.1363,
"step": 3090
},
{
"epoch": 0.76,
"learning_rate": 6.856493852801485e-06,
"loss": 0.1184,
"step": 3100
},
{
"epoch": 0.76,
"learning_rate": 6.724824310551961e-06,
"loss": 0.1475,
"step": 3110
},
{
"epoch": 0.76,
"learning_rate": 6.594234584635694e-06,
"loss": 0.1336,
"step": 3120
},
{
"epoch": 0.77,
"learning_rate": 6.464732391143713e-06,
"loss": 0.127,
"step": 3130
},
{
"epoch": 0.77,
"learning_rate": 6.336325381908529e-06,
"loss": 0.1315,
"step": 3140
},
{
"epoch": 0.77,
"learning_rate": 6.209021144052054e-06,
"loss": 0.1257,
"step": 3150
},
{
"epoch": 0.77,
"learning_rate": 6.082827199537295e-06,
"loss": 0.1356,
"step": 3160
},
{
"epoch": 0.78,
"learning_rate": 5.9577510047238825e-06,
"loss": 0.1306,
"step": 3170
},
{
"epoch": 0.78,
"learning_rate": 5.833799949927537e-06,
"loss": 0.1163,
"step": 3180
},
{
"epoch": 0.78,
"learning_rate": 5.710981358983384e-06,
"loss": 0.1333,
"step": 3190
},
{
"epoch": 0.78,
"learning_rate": 5.589302488813186e-06,
"loss": 0.139,
"step": 3200
},
{
"epoch": 0.79,
"learning_rate": 5.468770528996614e-06,
"loss": 0.1261,
"step": 3210
},
{
"epoch": 0.79,
"learning_rate": 5.349392601346398e-06,
"loss": 0.1274,
"step": 3220
},
{
"epoch": 0.79,
"learning_rate": 5.231175759487536e-06,
"loss": 0.1136,
"step": 3230
},
{
"epoch": 0.79,
"learning_rate": 5.114126988440523e-06,
"loss": 0.1289,
"step": 3240
},
{
"epoch": 0.8,
"learning_rate": 4.998253204208625e-06,
"loss": 0.1462,
"step": 3250
},
{
"epoch": 0.8,
"learning_rate": 4.8835612533692455e-06,
"loss": 0.1209,
"step": 3260
},
{
"epoch": 0.8,
"learning_rate": 4.770057912669362e-06,
"loss": 0.1266,
"step": 3270
},
{
"epoch": 0.8,
"learning_rate": 4.65774988862514e-06,
"loss": 0.1487,
"step": 3280
},
{
"epoch": 0.8,
"learning_rate": 4.546643817125662e-06,
"loss": 0.1101,
"step": 3290
},
{
"epoch": 0.81,
"learning_rate": 4.436746263040819e-06,
"loss": 0.1183,
"step": 3300
},
{
"epoch": 0.81,
"learning_rate": 4.328063719833433e-06,
"loss": 0.1133,
"step": 3310
},
{
"epoch": 0.81,
"learning_rate": 4.220602609175575e-06,
"loss": 0.1159,
"step": 3320
},
{
"epoch": 0.81,
"learning_rate": 4.114369280569136e-06,
"loss": 0.1288,
"step": 3330
},
{
"epoch": 0.82,
"learning_rate": 4.009370010970634e-06,
"loss": 0.1222,
"step": 3340
},
{
"epoch": 0.82,
"learning_rate": 3.90561100442036e-06,
"loss": 0.1208,
"step": 3350
},
{
"epoch": 0.82,
"learning_rate": 3.8030983916757966e-06,
"loss": 0.1096,
"step": 3360
},
{
"epoch": 0.82,
"learning_rate": 3.7018382298493635e-06,
"loss": 0.1157,
"step": 3370
},
{
"epoch": 0.83,
"learning_rate": 3.6018365020505124e-06,
"loss": 0.1264,
"step": 3380
},
{
"epoch": 0.83,
"learning_rate": 3.503099117032249e-06,
"loss": 0.1315,
"step": 3390
},
{
"epoch": 0.83,
"learning_rate": 3.405631908841966e-06,
"loss": 0.1317,
"step": 3400
},
{
"epoch": 0.83,
"learning_rate": 3.3094406364767267e-06,
"loss": 0.1382,
"step": 3410
},
{
"epoch": 0.84,
"learning_rate": 3.214530983543021e-06,
"loss": 0.1366,
"step": 3420
},
{
"epoch": 0.84,
"learning_rate": 3.120908557920918e-06,
"loss": 0.1308,
"step": 3430
},
{
"epoch": 0.84,
"learning_rate": 3.028578891432701e-06,
"loss": 0.1448,
"step": 3440
},
{
"epoch": 0.84,
"learning_rate": 2.9375474395160528e-06,
"loss": 0.1292,
"step": 3450
},
{
"epoch": 0.85,
"learning_rate": 2.8478195809016674e-06,
"loss": 0.13,
"step": 3460
},
{
"epoch": 0.85,
"learning_rate": 2.7594006172954906e-06,
"loss": 0.1333,
"step": 3470
},
{
"epoch": 0.85,
"learning_rate": 2.6722957730653973e-06,
"loss": 0.1527,
"step": 3480
},
{
"epoch": 0.85,
"learning_rate": 2.5865101949325567e-06,
"loss": 0.1154,
"step": 3490
},
{
"epoch": 0.86,
"learning_rate": 2.50204895166731e-06,
"loss": 0.1247,
"step": 3500
},
{
"epoch": 0.86,
"learning_rate": 2.418917033789661e-06,
"loss": 0.1211,
"step": 3510
},
{
"epoch": 0.86,
"learning_rate": 2.3371193532744305e-06,
"loss": 0.1208,
"step": 3520
},
{
"epoch": 0.86,
"learning_rate": 2.2566607432610047e-06,
"loss": 0.1208,
"step": 3530
},
{
"epoch": 0.87,
"learning_rate": 2.177545957767771e-06,
"loss": 0.1251,
"step": 3540
},
{
"epoch": 0.87,
"learning_rate": 2.0997796714112144e-06,
"loss": 0.1175,
"step": 3550
},
{
"epoch": 0.87,
"learning_rate": 2.023366479129718e-06,
"loss": 0.1207,
"step": 3560
},
{
"epoch": 0.87,
"learning_rate": 1.9483108959120587e-06,
"loss": 0.1198,
"step": 3570
},
{
"epoch": 0.88,
"learning_rate": 1.874617356530628e-06,
"loss": 0.113,
"step": 3580
},
{
"epoch": 0.88,
"learning_rate": 1.8022902152794036e-06,
"loss": 0.1261,
"step": 3590
},
{
"epoch": 0.88,
"learning_rate": 1.7313337457166707e-06,
"loss": 0.1297,
"step": 3600
},
{
"epoch": 0.88,
"learning_rate": 1.661752140412512e-06,
"loss": 0.1197,
"step": 3610
},
{
"epoch": 0.89,
"learning_rate": 1.5935495107010728e-06,
"loss": 0.133,
"step": 3620
},
{
"epoch": 0.89,
"learning_rate": 1.5267298864376527e-06,
"loss": 0.1245,
"step": 3630
},
{
"epoch": 0.89,
"learning_rate": 1.461297215760582e-06,
"loss": 0.1337,
"step": 3640
},
{
"epoch": 0.89,
"learning_rate": 1.3972553648579468e-06,
"loss": 0.1437,
"step": 3650
},
{
"epoch": 0.9,
"learning_rate": 1.3346081177391472e-06,
"loss": 0.1467,
"step": 3660
},
{
"epoch": 0.9,
"learning_rate": 1.2733591760113184e-06,
"loss": 0.1298,
"step": 3670
},
{
"epoch": 0.9,
"learning_rate": 1.2135121586606014e-06,
"loss": 0.1239,
"step": 3680
},
{
"epoch": 0.9,
"learning_rate": 1.1550706018383345e-06,
"loss": 0.1252,
"step": 3690
},
{
"epoch": 0.91,
"learning_rate": 1.09803795865209e-06,
"loss": 0.1475,
"step": 3700
},
{
"epoch": 0.91,
"learning_rate": 1.0424175989616563e-06,
"loss": 0.1365,
"step": 3710
},
{
"epoch": 0.91,
"learning_rate": 9.882128091799114e-07,
"loss": 0.127,
"step": 3720
},
{
"epoch": 0.91,
"learning_rate": 9.354267920786614e-07,
"loss": 0.1281,
"step": 3730
},
{
"epoch": 0.92,
"learning_rate": 8.840626665993807e-07,
"loss": 0.1219,
"step": 3740
},
{
"epoch": 0.92,
"learning_rate": 8.341234676689319e-07,
"loss": 0.1227,
"step": 3750
},
{
"epoch": 0.92,
"learning_rate": 7.856121460202398e-07,
"loss": 0.1568,
"step": 3760
},
{
"epoch": 0.92,
"learning_rate": 7.385315680179583e-07,
"loss": 0.1202,
"step": 3770
},
{
"epoch": 0.92,
"learning_rate": 6.928845154890757e-07,
"loss": 0.1374,
"step": 3780
},
{
"epoch": 0.93,
"learning_rate": 6.486736855585762e-07,
"loss": 0.1235,
"step": 3790
},
{
"epoch": 0.93,
"learning_rate": 6.059016904900628e-07,
"loss": 0.1252,
"step": 3800
},
{
"epoch": 0.93,
"learning_rate": 5.645710575314073e-07,
"loss": 0.1261,
"step": 3810
},
{
"epoch": 0.93,
"learning_rate": 5.246842287654235e-07,
"loss": 0.1338,
"step": 3820
},
{
"epoch": 0.94,
"learning_rate": 4.862435609655824e-07,
"loss": 0.1345,
"step": 3830
},
{
"epoch": 0.94,
"learning_rate": 4.492513254567482e-07,
"loss": 0.1164,
"step": 3840
},
{
"epoch": 0.94,
"learning_rate": 4.137097079809776e-07,
"loss": 0.1212,
"step": 3850
},
{
"epoch": 0.94,
"learning_rate": 3.796208085683733e-07,
"loss": 0.1289,
"step": 3860
},
{
"epoch": 0.95,
"learning_rate": 3.4698664141299686e-07,
"loss": 0.1412,
"step": 3870
},
{
"epoch": 0.95,
"learning_rate": 3.1580913475386107e-07,
"loss": 0.1293,
"step": 3880
},
{
"epoch": 0.95,
"learning_rate": 2.8609013076099365e-07,
"loss": 0.1191,
"step": 3890
},
{
"epoch": 0.95,
"learning_rate": 2.5783138542659345e-07,
"loss": 0.1247,
"step": 3900
},
{
"epoch": 0.96,
"learning_rate": 2.3103456846126913e-07,
"loss": 0.1175,
"step": 3910
},
{
"epoch": 0.96,
"learning_rate": 2.057012631953903e-07,
"loss": 0.1317,
"step": 3920
},
{
"epoch": 0.96,
"learning_rate": 1.8183296648552616e-07,
"loss": 0.1421,
"step": 3930
},
{
"epoch": 0.96,
"learning_rate": 1.5943108862600809e-07,
"loss": 0.1365,
"step": 3940
},
{
"epoch": 0.97,
"learning_rate": 1.384969532655933e-07,
"loss": 0.1261,
"step": 3950
},
{
"epoch": 0.97,
"learning_rate": 1.190317973292554e-07,
"loss": 0.1054,
"step": 3960
},
{
"epoch": 0.97,
"learning_rate": 1.0103677094510932e-07,
"loss": 0.1251,
"step": 3970
},
{
"epoch": 0.97,
"learning_rate": 8.451293737644638e-08,
"loss": 0.1446,
"step": 3980
},
{
"epoch": 0.98,
"learning_rate": 6.946127295890392e-08,
"loss": 0.1271,
"step": 3990
},
{
"epoch": 0.98,
"learning_rate": 5.5882667042789214e-08,
"loss": 0.1372,
"step": 4000
},
{
"epoch": 0.98,
"learning_rate": 4.377792194052432e-08,
"loss": 0.1258,
"step": 4010
},
{
"epoch": 0.98,
"learning_rate": 3.314775287923677e-08,
"loss": 0.1268,
"step": 4020
},
{
"epoch": 0.99,
"learning_rate": 2.399278795851001e-08,
"loss": 0.1372,
"step": 4030
},
{
"epoch": 0.99,
"learning_rate": 1.631356811325757e-08,
"loss": 0.1274,
"step": 4040
},
{
"epoch": 0.99,
"learning_rate": 1.0110547081776389e-08,
"loss": 0.1361,
"step": 4050
},
{
"epoch": 0.99,
"learning_rate": 5.384091378923817e-09,
"loss": 0.1202,
"step": 4060
},
{
"epoch": 1.0,
"learning_rate": 2.1344802744627246e-09,
"loss": 0.1315,
"step": 4070
},
{
"epoch": 1.0,
"learning_rate": 3.6190577657468914e-10,
"loss": 0.1349,
"step": 4080
},
{
"epoch": 1.0,
"step": 4087,
"total_flos": 2.507754096179712e+18,
"train_loss": 0.1560091979059696,
"train_runtime": 21225.5051,
"train_samples_per_second": 3.081,
"train_steps_per_second": 0.193
}
],
"logging_steps": 10,
"max_steps": 4087,
"num_train_epochs": 1,
"save_steps": 1000,
"total_flos": 2.507754096179712e+18,
"trial_name": null,
"trial_params": null
}